From fe422a45af0fb85b682bd4c108f50e7f248d9161 Mon Sep 17 00:00:00 2001 From: Jinank Jain Date: Fri, 20 Jun 2025 18:10:52 +0530 Subject: [PATCH 001/294] build: Move away from actions-rs/cross Since action-rs/cross is deprecrated, thus move to houseabsolute/actions-rust-cross. We should pin the cross-version to the latest version to fix the build issues with virtio-bindings crate. Signed-off-by: Jinank Jain --- .github/workflows/quality.yaml | 91 +++++++++++++++++++--------------- 1 file changed, 50 insertions(+), 41 deletions(-) diff --git a/.github/workflows/quality.yaml b/.github/workflows/quality.yaml index 19a4981a4d..0ece7fa690 100644 --- a/.github/workflows/quality.yaml +++ b/.github/workflows/quality.yaml @@ -50,88 +50,97 @@ jobs: git checkout ${{ github.sha }} - name: Clippy (kvm) - uses: actions-rs/cargo@v1 + uses: houseabsolute/actions-rust-cross@v1 with: - use-cross: ${{ matrix.target != 'x86_64-unknown-linux-gnu' }} command: clippy - args: --target=${{ matrix.target }} --locked --all --all-targets --no-default-features --tests --examples --features "kvm" -- -D warnings -D clippy::undocumented_unsafe_blocks -W clippy::assertions_on_result_states + cross-version: 3e0957637b49b1bbced23ad909170650c5b70635 + toolchain: ${{ matrix.rust }} + target: ${{ matrix.target }} + args: --locked --all --all-targets --no-default-features --tests --examples --features "kvm" -- -D warnings -D clippy::undocumented_unsafe_blocks -W clippy::assertions_on_result_states - name: Clippy (mshv) - uses: actions-rs/cargo@v1 + uses: houseabsolute/actions-rust-cross@v1 with: - use-cross: ${{ matrix.target != 'x86_64-unknown-linux-gnu' }} command: clippy - args: --target=${{ matrix.target }} --locked --all --all-targets --no-default-features --tests --examples --features "mshv" -- -D warnings -D clippy::undocumented_unsafe_blocks -W clippy::assertions_on_result_states + cross-version: 3e0957637b49b1bbced23ad909170650c5b70635 + toolchain: ${{ matrix.rust }} + target: ${{ matrix.target }} + args: --locked --all --all-targets --no-default-features --tests --examples --features "mshv" -- -D warnings -D clippy::undocumented_unsafe_blocks -W clippy::assertions_on_result_states - name: Clippy (mshv + kvm) - uses: actions-rs/cargo@v1 + uses: houseabsolute/actions-rust-cross@v1 with: - use-cross: ${{ matrix.target != 'x86_64-unknown-linux-gnu' }} command: clippy - args: --target=${{ matrix.target }} --locked --all --all-targets --no-default-features --tests --examples --features "mshv,kvm" -- -D warnings -D clippy::undocumented_unsafe_blocks -W clippy::assertions_on_result_states + cross-version: 3e0957637b49b1bbced23ad909170650c5b70635 + toolchain: ${{ matrix.rust }} + target: ${{ matrix.target }} + args: --locked --all --all-targets --no-default-features --tests --examples --features "mshv,kvm" -- -D warnings -D clippy::undocumented_unsafe_blocks -W clippy::assertions_on_result_states - name: Clippy (default features) - uses: actions-rs/cargo@v1 + uses: houseabsolute/actions-rust-cross@v1 with: - use-cross: ${{ matrix.target != 'x86_64-unknown-linux-gnu' }} command: clippy - args: --target=${{ matrix.target }} --locked --all --all-targets --tests --examples -- -D warnings -D clippy::undocumented_unsafe_blocks -W clippy::assertions_on_result_states + cross-version: 3e0957637b49b1bbced23ad909170650c5b70635 + toolchain: ${{ matrix.rust }} + target: ${{ matrix.target }} + args: --locked --all --all-targets --tests --examples -- -D warnings -D clippy::undocumented_unsafe_blocks -W clippy::assertions_on_result_states - name: Clippy (default features + guest_debug) - uses: actions-rs/cargo@v1 + uses: houseabsolute/actions-rust-cross@v1 with: - use-cross: ${{ matrix.target != 'x86_64-unknown-linux-gnu' }} command: clippy - args: --target=${{ matrix.target }} --locked --all --all-targets --tests --examples --features "guest_debug" -- -D warnings -D clippy::undocumented_unsafe_blocks -W clippy::assertions_on_result_states + cross-version: 3e0957637b49b1bbced23ad909170650c5b70635 + toolchain: ${{ matrix.rust }} + target: ${{ matrix.target }} + args: --locked --all --all-targets --tests --examples --features "guest_debug" -- -D warnings -D clippy::undocumented_unsafe_blocks -W clippy::assertions_on_result_states - name: Clippy (default features + pvmemcontrol) - uses: actions-rs/cargo@v1 + uses: houseabsolute/actions-rust-cross@v1 with: - use-cross: ${{ matrix.target != 'x86_64-unknown-linux-gnu' }} command: clippy - args: --target=${{ matrix.target }} --locked --all --all-targets --tests --examples --features "pvmemcontrol" -- -D warnings -D clippy::undocumented_unsafe_blocks -W clippy::assertions_on_result_states + cross-version: 3e0957637b49b1bbced23ad909170650c5b70635 + toolchain: ${{ matrix.rust }} + target: ${{ matrix.target }} + args: --locked --all --all-targets --tests --examples --features "pvmemcontrol" -- -D warnings -D clippy::undocumented_unsafe_blocks -W clippy::assertions_on_result_states - name: Clippy (default features + tracing) - uses: actions-rs/cargo@v1 - with: - use-cross: ${{ matrix.target != 'x86_64-unknown-linux-gnu' }} - command: clippy - args: --target=${{ matrix.target }} --locked --all --all-targets --tests --examples --features "tracing" -- -D warnings -D clippy::undocumented_unsafe_blocks -W clippy::assertions_on_result_states - - - name: Clippy (mshv) - if: ${{ matrix.target == 'x86_64-unknown-linux-gnu' }} - uses: actions-rs/cargo@v1 - with: - command: clippy - args: --target=${{ matrix.target }} --locked --all --all-targets --no-default-features --tests --examples --features "mshv" -- -D warnings -D clippy::undocumented_unsafe_blocks -W clippy::assertions_on_result_states - - - name: Clippy (mshv + kvm) - if: ${{ matrix.target == 'x86_64-unknown-linux-gnu' }} - uses: actions-rs/cargo@v1 + uses: houseabsolute/actions-rust-cross@v1 with: command: clippy - args: --target=${{ matrix.target }} --locked --all --all-targets --no-default-features --tests --examples --features "mshv,kvm" -- -D warnings -D clippy::undocumented_unsafe_blocks -W clippy::assertions_on_result_states + cross-version: 3e0957637b49b1bbced23ad909170650c5b70635 + toolchain: ${{ matrix.rust }} + target: ${{ matrix.target }} + args: --locked --all --all-targets --tests --examples --features "tracing" -- -D warnings -D clippy::undocumented_unsafe_blocks -W clippy::assertions_on_result_states - name: Clippy (sev_snp) if: ${{ matrix.target == 'x86_64-unknown-linux-gnu' }} - uses: actions-rs/cargo@v1 + uses: houseabsolute/actions-rust-cross@v1 with: command: clippy - args: --target=${{ matrix.target }} --locked --all --all-targets --no-default-features --tests --examples --features "sev_snp" -- -D warnings -D clippy::undocumented_unsafe_blocks -W clippy::assertions_on_result_states + cross-version: 3e0957637b49b1bbced23ad909170650c5b70635 + toolchain: ${{ matrix.rust }} + target: ${{ matrix.target }} + args: --locked --all --all-targets --no-default-features --tests --examples --features "sev_snp" -- -D warnings -D clippy::undocumented_unsafe_blocks -W clippy::assertions_on_result_states - name: Clippy (igvm) if: ${{ matrix.target == 'x86_64-unknown-linux-gnu' }} - uses: actions-rs/cargo@v1 + uses: houseabsolute/actions-rust-cross@v1 with: command: clippy - args: --target=${{ matrix.target }} --locked --all --all-targets --no-default-features --tests --examples --features "igvm" -- -D warnings -D clippy::undocumented_unsafe_blocks -W clippy::assertions_on_result_states + cross-version: 3e0957637b49b1bbced23ad909170650c5b70635 + toolchain: ${{ matrix.rust }} + target: ${{ matrix.target }} + args: --locked --all --all-targets --no-default-features --tests --examples --features "igvm" -- -D warnings -D clippy::undocumented_unsafe_blocks -W clippy::assertions_on_result_states - name: Clippy (kvm + tdx) if: ${{ matrix.target == 'x86_64-unknown-linux-gnu' }} - uses: actions-rs/cargo@v1 + uses: houseabsolute/actions-rust-cross@v1 with: command: clippy - args: --target=${{ matrix.target }} --locked --all --all-targets --no-default-features --tests --examples --features "tdx,kvm" -- -D warnings -D clippy::undocumented_unsafe_blocks -W clippy::assertions_on_result_states + cross-version: 3e0957637b49b1bbced23ad909170650c5b70635 + toolchain: ${{ matrix.rust }} + target: ${{ matrix.target }} + args: --locked --all --all-targets --no-default-features --tests --examples --features "tdx,kvm" -- -D warnings -D clippy::undocumented_unsafe_blocks -W clippy::assertions_on_result_states - name: Check build did not modify any files run: test -z "$(git status --porcelain)" From 190d90196fff389b60b93b57acf958957b71b249 Mon Sep 17 00:00:00 2001 From: Jinank Jain Date: Tue, 10 Jun 2025 15:14:00 +0530 Subject: [PATCH 002/294] build: Bump vfio and all the dependent crates to latest version Recently vfio crates have moved to crates.io, thus we should start consuming the crate from crates.io instead git url. This results in better versioning instead of tracking some git commit sha. Signed-off-by: Jinank Jain --- Cargo.lock | 69 +++++----- Cargo.toml | 20 +-- block/Cargo.toml | 2 +- block/src/lib.rs | 2 +- fuzz/Cargo.lock | 130 +++++++++--------- fuzz/Cargo.toml | 4 +- hypervisor/src/kvm/mod.rs | 36 ++--- virtio-devices/Cargo.toml | 2 +- virtio-devices/src/vdpa.rs | 5 +- .../src/vhost_user/vu_common_ctrl.rs | 5 +- vm-virtio/src/queue.rs | 3 +- 11 files changed, 135 insertions(+), 143 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index afa7538716..afd186fa63 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1042,20 +1042,20 @@ dependencies = [ [[package]] name = "kvm-bindings" -version = "0.10.0" +version = "0.12.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fa4933174d0cc4b77b958578cd45784071cc5ae212c2d78fbd755aaaa6dfa71a" +checksum = "d4b153a59bb3ca930ff8148655b2ef68c34259a623ae08cf2fb9b570b2e45363" dependencies = [ "serde", "vmm-sys-util", - "zerocopy 0.7.35", + "zerocopy 0.8.26", ] [[package]] name = "kvm-ioctls" -version = "0.19.1" +version = "0.22.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e013ae7fcd2c6a8f384104d16afe7ea02969301ea2bb2a56e44b011ebc907cab" +checksum = "b702df98508cb63ad89dd9beb9f6409761b30edca10d48e57941d3f11513a006" dependencies = [ "bitflags 2.9.0", "kvm-bindings", @@ -1183,7 +1183,7 @@ dependencies = [ [[package]] name = "micro_http" version = "0.1.0" -source = "git+https://github.com/firecracker-microvm/micro-http?branch=main#4f621532e81ee2ad096a9c9592fdacc40d19de48" +source = "git+https://github.com/firecracker-microvm/micro-http?branch=main#bf5098916006912f8dd35aaa6daa5579c6c297b2" dependencies = [ "libc", "vmm-sys-util", @@ -2188,21 +2188,9 @@ dependencies = [ "getrandom 0.3.3", "js-sys", "rand", - "uuid-macro-internal", "wasm-bindgen", ] -[[package]] -name = "uuid-macro-internal" -version = "1.17.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "26b682e8c381995ea03130e381928e0e005b7c9eb483c6c8682f50e07b33c2b7" -dependencies = [ - "proc-macro2", - "quote", - "syn", -] - [[package]] name = "vcpkg" version = "0.2.15" @@ -2211,16 +2199,18 @@ checksum = "accd4ea62f7bb7a82fe23066fb0957d48ef677f6eeb8215f372f52e48bb32426" [[package]] name = "vfio-bindings" -version = "0.4.0" -source = "git+https://github.com/rust-vmm/vfio?branch=main#3d158a14460cac7ca3c99c2effa0a46880935cb0" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b565663f62e091ca47db9a674c8c95c9686a000e82970f391a3cacf6470ff060" dependencies = [ "vmm-sys-util", ] [[package]] name = "vfio-ioctls" -version = "0.4.0" -source = "git+https://github.com/rust-vmm/vfio?branch=main#3d158a14460cac7ca3c99c2effa0a46880935cb0" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "61316b5e308faa8ed4a87c4130256f765e46de3442eb2e2e619840ef73456738" dependencies = [ "byteorder", "kvm-bindings", @@ -2229,7 +2219,7 @@ dependencies = [ "log", "mshv-bindings", "mshv-ioctls", - "thiserror 1.0.62", + "thiserror 2.0.12", "vfio-bindings", "vm-memory", "vmm-sys-util", @@ -2238,15 +2228,16 @@ dependencies = [ [[package]] name = "vfio_user" version = "0.1.0" -source = "git+https://github.com/rust-vmm/vfio-user?branch=main#3febcdd3fa2531623865663ca1721e1962ed9979" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ed81c5ed8224d468a322e923777ed0615cad433fe61177126098af995f89cecf" dependencies = [ - "bitflags 1.3.2", + "bitflags 2.9.0", "libc", "log", "serde", "serde_derive", "serde_json", - "thiserror 1.0.62", + "thiserror 2.0.12", "vfio-bindings", "vm-memory", "vmm-sys-util", @@ -2254,8 +2245,9 @@ dependencies = [ [[package]] name = "vhost" -version = "0.12.1" -source = "git+https://github.com/rust-vmm/vhost?rev=d983ae0#d983ae07f78663b7d24059667376992460b571a2" +version = "0.14.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2a4dcad85a129d97d5d4b2f3c47a4affdeedd76bdcd02094bcb5d9b76cac2d05" dependencies = [ "bitflags 2.9.0", "libc", @@ -2266,8 +2258,9 @@ dependencies = [ [[package]] name = "vhost-user-backend" -version = "0.16.1" -source = "git+https://github.com/rust-vmm/vhost?rev=d983ae0#d983ae07f78663b7d24059667376992460b571a2" +version = "0.20.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e183205a9ba7cb9c47fcb0fc0a07fc295a110efbb11ab78ad0d793b0a38a7bde" dependencies = [ "libc", "log", @@ -2319,9 +2312,9 @@ dependencies = [ [[package]] name = "virtio-bindings" -version = "0.2.4" +version = "0.2.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1711e61c00f8cb450bd15368152a1e37a12ef195008ddc7d0f4812f9e2b30a68" +checksum = "804f498a26d5a63be7bbb8bdcd3869c3f286c4c4a17108905276454da0caf8cb" [[package]] name = "virtio-devices" @@ -2359,9 +2352,9 @@ dependencies = [ [[package]] name = "virtio-queue" -version = "0.14.0" +version = "0.16.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "872e2f3fbd70a7e6f01689720cce3d5c2c5efe52b484dd07b674246ada0e9a8d" +checksum = "fb0479158f863e59323771a1f684d843962f76960b86fecfec2bfa9c8f0f9180" dependencies = [ "log", "virtio-bindings", @@ -2398,9 +2391,9 @@ source = "git+https://github.com/rust-vmm/vm-fdt?branch=main#ef5bd734f5f66fb0772 [[package]] name = "vm-memory" -version = "0.16.1" +version = "0.16.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f1720e7240cdc739f935456eb77f370d7e9b2a3909204da1e2b47bef1137a013" +checksum = "1fd5e56d48353c5f54ef50bd158a0452fc82f5383da840f7b8efc31695dd3b9d" dependencies = [ "arc-swap", "libc", @@ -2489,9 +2482,9 @@ dependencies = [ [[package]] name = "vmm-sys-util" -version = "0.12.1" +version = "0.14.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1d1435039746e20da4f8d507a72ee1b916f7b4b05af7a91c093d2c6561934ede" +checksum = "d21f366bf22bfba3e868349978766a965cbe628c323d58e026be80b8357ab789" dependencies = [ "bitflags 1.3.2", "libc", diff --git a/Cargo.toml b/Cargo.toml index fefbd227e4..0cc8e53a9b 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -103,22 +103,22 @@ members = [ [workspace.dependencies] # rust-vmm crates acpi_tables = { git = "https://github.com/rust-vmm/acpi_tables", branch = "main" } -kvm-bindings = "0.10.0" -kvm-ioctls = "0.19.1" +kvm-bindings = "0.12.0" +kvm-ioctls = "0.22.0" linux-loader = "0.13.0" mshv-bindings = "0.5.1" mshv-ioctls = "0.5.1" seccompiler = "0.5.0" -vfio-bindings = { git = "https://github.com/rust-vmm/vfio", branch = "main" } -vfio-ioctls = { git = "https://github.com/rust-vmm/vfio", branch = "main", default-features = false } -vfio_user = { git = "https://github.com/rust-vmm/vfio-user", branch = "main" } -vhost = { git = "https://github.com/rust-vmm/vhost", rev = "d983ae0" } -vhost-user-backend = { git = "https://github.com/rust-vmm/vhost", rev = "d983ae0" } -virtio-bindings = "0.2.4" -virtio-queue = "0.14.0" +vfio-bindings = { version = "0.5.0", default-features = false } +vfio-ioctls = { version = "0.5.0", default-features = false } +vfio_user = { version = "0.1.0", default-features = false } +vhost = { version = "0.14.0", default-features = false } +vhost-user-backend = { version = "0.20.0", default-features = false } +virtio-bindings = "0.2.6" +virtio-queue = "0.16.0" vm-fdt = { git = "https://github.com/rust-vmm/vm-fdt", branch = "main" } vm-memory = "0.16.1" -vmm-sys-util = "0.12.1" +vmm-sys-util = "0.14.0" # igvm crates # TODO: bump to 0.3.5 release diff --git a/block/Cargo.toml b/block/Cargo.toml index 02bf37eb03..48551251a5 100644 --- a/block/Cargo.toml +++ b/block/Cargo.toml @@ -19,7 +19,7 @@ serde = { version = "1.0.208", features = ["derive"] } smallvec = "1.13.2" thiserror = { workspace = true } uuid = { workspace = true, features = ["v4"] } -virtio-bindings = { workspace = true, features = ["virtio-v5_0_0"] } +virtio-bindings = { workspace = true } virtio-queue = { workspace = true } vm-memory = { workspace = true, features = [ "backend-atomic", diff --git a/block/src/lib.rs b/block/src/lib.rs index 1424848ba3..b081c87734 100644 --- a/block/src/lib.rs +++ b/block/src/lib.rs @@ -57,7 +57,7 @@ use vm_memory::{ }; use vm_virtio::{AccessPlatform, Translatable}; use vmm_sys_util::eventfd::EventFd; -use vmm_sys_util::{aio, ioctl_io_nr, ioctl_ioc_nr}; +use vmm_sys_util::{aio, ioctl_io_nr}; use crate::async_io::{AsyncIo, AsyncIoError, AsyncIoResult}; use crate::vhdx::VhdxError; diff --git a/fuzz/Cargo.lock b/fuzz/Cargo.lock index 967c057442..85596958ce 100644 --- a/fuzz/Cargo.lock +++ b/fuzz/Cargo.lock @@ -7,7 +7,7 @@ name = "acpi_tables" version = "0.1.0" source = "git+https://github.com/rust-vmm/acpi_tables?branch=main#e08a3f0b0a59b98859dbf59f5aa7fd4d2eb4018a" dependencies = [ - "zerocopy 0.8.24", + "zerocopy 0.8.26", ] [[package]] @@ -480,7 +480,7 @@ dependencies = [ "vfio-ioctls", "vm-memory", "vmm-sys-util", - "zerocopy 0.8.24", + "zerocopy 0.8.26", ] [[package]] @@ -531,29 +531,30 @@ dependencies = [ [[package]] name = "js-sys" -version = "0.3.69" +version = "0.3.77" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "29c15563dc2726973df627357ce0c9ddddbea194836909d655df6a75d2cf296d" +checksum = "1cfaf33c695fc6e08064efbc1f72ec937429614f25eef83af942d0e227c3a28f" dependencies = [ + "once_cell", "wasm-bindgen", ] [[package]] name = "kvm-bindings" -version = "0.10.0" +version = "0.12.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fa4933174d0cc4b77b958578cd45784071cc5ae212c2d78fbd755aaaa6dfa71a" +checksum = "d4b153a59bb3ca930ff8148655b2ef68c34259a623ae08cf2fb9b570b2e45363" dependencies = [ "serde", "vmm-sys-util", - "zerocopy 0.7.35", + "zerocopy 0.8.26", ] [[package]] name = "kvm-ioctls" -version = "0.19.1" +version = "0.22.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e013ae7fcd2c6a8f384104d16afe7ea02969301ea2bb2a56e44b011ebc907cab" +checksum = "b702df98508cb63ad89dd9beb9f6409761b30edca10d48e57941d3f11513a006" dependencies = [ "bitflags 2.9.0", "kvm-bindings", @@ -635,7 +636,7 @@ checksum = "78ca9ab1a0babb1e7d5695e3530886289c18cf2f87ec19a575a0abdce112e3a3" [[package]] name = "micro_http" version = "0.1.0" -source = "git+https://github.com/firecracker-microvm/micro-http?branch=main#4f621532e81ee2ad096a9c9592fdacc40d19de48" +source = "git+https://github.com/firecracker-microvm/micro-http?branch=main#bf5098916006912f8dd35aaa6daa5579c6c297b2" dependencies = [ "libc", "vmm-sys-util", @@ -652,7 +653,7 @@ dependencies = [ "serde", "serde_derive", "vmm-sys-util", - "zerocopy 0.8.24", + "zerocopy 0.8.26", ] [[package]] @@ -830,7 +831,7 @@ checksum = "3779b94aeb87e8bd4e834cee3650289ee9e0d5677f976ecdb6d219e5f4f6cd94" dependencies = [ "rand_chacha", "rand_core", - "zerocopy 0.8.24", + "zerocopy 0.8.26", ] [[package]] @@ -865,15 +866,21 @@ dependencies = [ [[package]] name = "remain" -version = "0.2.14" +version = "0.2.15" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "46aef80f842736de545ada6ec65b81ee91504efd6853f4b96de7414c42ae7443" +checksum = "d7ef12e84481ab4006cb942f8682bba28ece7270743e649442027c5db87df126" dependencies = [ "proc-macro2", "quote", "syn", ] +[[package]] +name = "rustversion" +version = "1.0.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8a0d197bd2c9dc6e53b84da9556a69ba4cdfab8619eb41a8bd1cc2027a0f6b1d" + [[package]] name = "ryu" version = "1.0.18" @@ -1105,45 +1112,37 @@ checksum = "06abde3611657adf66d383f00b093d7faecc7fa57071cce2578660c9f1010821" [[package]] name = "uuid" -version = "1.15.1" +version = "1.17.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e0f540e3240398cce6128b64ba83fdbdd86129c16a3aa1a3a252efd66eb3d587" +checksum = "3cf4199d1e5d15ddd86a694e4d0dffa9c323ce759fea589f00fef9d81cc1931d" dependencies = [ "getrandom 0.3.3", + "js-sys", "rand", - "uuid-macro-internal", -] - -[[package]] -name = "uuid-macro-internal" -version = "1.15.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9521621447c21497fac206ffe6e9f642f977c4f82eeba9201055f64884d9cb01" -dependencies = [ - "proc-macro2", - "quote", - "syn", + "wasm-bindgen", ] [[package]] name = "vfio-bindings" -version = "0.4.0" -source = "git+https://github.com/rust-vmm/vfio?branch=main#b135b8305c2cc8ec333e0cf77a780445cc98dcee" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b565663f62e091ca47db9a674c8c95c9686a000e82970f391a3cacf6470ff060" dependencies = [ "vmm-sys-util", ] [[package]] name = "vfio-ioctls" -version = "0.2.0" -source = "git+https://github.com/rust-vmm/vfio?branch=main#b135b8305c2cc8ec333e0cf77a780445cc98dcee" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "61316b5e308faa8ed4a87c4130256f765e46de3442eb2e2e619840ef73456738" dependencies = [ "byteorder", "kvm-bindings", "kvm-ioctls", "libc", "log", - "thiserror 1.0.64", + "thiserror 2.0.12", "vfio-bindings", "vm-memory", "vmm-sys-util", @@ -1152,15 +1151,16 @@ dependencies = [ [[package]] name = "vfio_user" version = "0.1.0" -source = "git+https://github.com/rust-vmm/vfio-user?branch=main#3febcdd3fa2531623865663ca1721e1962ed9979" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ed81c5ed8224d468a322e923777ed0615cad433fe61177126098af995f89cecf" dependencies = [ - "bitflags 1.3.2", + "bitflags 2.9.0", "libc", "log", "serde", "serde_derive", "serde_json", - "thiserror 1.0.64", + "thiserror 2.0.12", "vfio-bindings", "vm-memory", "vmm-sys-util", @@ -1168,8 +1168,9 @@ dependencies = [ [[package]] name = "vhost" -version = "0.12.1" -source = "git+https://github.com/rust-vmm/vhost?rev=d983ae0#d983ae07f78663b7d24059667376992460b571a2" +version = "0.14.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2a4dcad85a129d97d5d4b2f3c47a4affdeedd76bdcd02094bcb5d9b76cac2d05" dependencies = [ "bitflags 2.9.0", "libc", @@ -1180,9 +1181,9 @@ dependencies = [ [[package]] name = "virtio-bindings" -version = "0.2.4" +version = "0.2.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1711e61c00f8cb450bd15368152a1e37a12ef195008ddc7d0f4812f9e2b30a68" +checksum = "804f498a26d5a63be7bbb8bdcd3869c3f286c4c4a17108905276454da0caf8cb" [[package]] name = "virtio-devices" @@ -1219,9 +1220,9 @@ dependencies = [ [[package]] name = "virtio-queue" -version = "0.14.0" +version = "0.16.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "872e2f3fbd70a7e6f01689720cce3d5c2c5efe52b484dd07b674246ada0e9a8d" +checksum = "fb0479158f863e59323771a1f684d843962f76960b86fecfec2bfa9c8f0f9180" dependencies = [ "log", "virtio-bindings", @@ -1258,9 +1259,9 @@ source = "git+https://github.com/rust-vmm/vm-fdt?branch=main#ef5bd734f5f66fb0772 [[package]] name = "vm-memory" -version = "0.16.1" +version = "0.16.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f1720e7240cdc739f935456eb77f370d7e9b2a3909204da1e2b47bef1137a013" +checksum = "1fd5e56d48353c5f54ef50bd158a0452fc82f5383da840f7b8efc31695dd3b9d" dependencies = [ "arc-swap", "libc", @@ -1335,14 +1336,14 @@ dependencies = [ "vm-migration", "vm-virtio", "vmm-sys-util", - "zerocopy 0.8.24", + "zerocopy 0.8.26", ] [[package]] name = "vmm-sys-util" -version = "0.12.1" +version = "0.14.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1d1435039746e20da4f8d507a72ee1b916f7b4b05af7a91c093d2c6561934ede" +checksum = "d21f366bf22bfba3e868349978766a965cbe628c323d58e026be80b8357ab789" dependencies = [ "bitflags 1.3.2", "libc", @@ -1367,24 +1368,24 @@ dependencies = [ [[package]] name = "wasm-bindgen" -version = "0.2.93" +version = "0.2.100" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a82edfc16a6c469f5f44dc7b571814045d60404b55a0ee849f9bcfa2e63dd9b5" +checksum = "1edc8929d7499fc4e8f0be2262a241556cfc54a0bea223790e71446f2aab1ef5" dependencies = [ "cfg-if", "once_cell", + "rustversion", "wasm-bindgen-macro", ] [[package]] name = "wasm-bindgen-backend" -version = "0.2.93" +version = "0.2.100" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9de396da306523044d3302746f1208fa71d7532227f15e347e2d93e4145dd77b" +checksum = "2f0a0651a5c2bc21487bde11ee802ccaf4c51935d0d3d42a6101f98161700bc6" dependencies = [ "bumpalo", "log", - "once_cell", "proc-macro2", "quote", "syn", @@ -1393,9 +1394,9 @@ dependencies = [ [[package]] name = "wasm-bindgen-macro" -version = "0.2.93" +version = "0.2.100" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "585c4c91a46b072c92e908d99cb1dcdf95c5218eeb6f3bf1efa991ee7a68cccf" +checksum = "7fe63fc6d09ed3792bd0897b314f53de8e16568c2b3f7982f468c0bf9bd0b407" dependencies = [ "quote", "wasm-bindgen-macro-support", @@ -1403,9 +1404,9 @@ dependencies = [ [[package]] name = "wasm-bindgen-macro-support" -version = "0.2.93" +version = "0.2.100" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "afc340c74d9005395cf9dd098506f7f44e38f2b4a21c6aaacf9a105ea5e1e836" +checksum = "8ae87ea40c9f689fc23f209965b6fb8a99ad69aeeb0231408be24920604395de" dependencies = [ "proc-macro2", "quote", @@ -1416,9 +1417,12 @@ dependencies = [ [[package]] name = "wasm-bindgen-shared" -version = "0.2.93" +version = "0.2.100" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c62a0a307cb4a311d3a07867860911ca130c3494e8c2719593806c08bc5d0484" +checksum = "1a05d73b933a847d6cccdda8f838a22ff101ad9bf93e33684f39c1f5f0eece3d" +dependencies = [ + "unicode-ident", +] [[package]] name = "winapi" @@ -1545,11 +1549,11 @@ dependencies = [ [[package]] name = "zerocopy" -version = "0.8.24" +version = "0.8.26" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2586fea28e186957ef732a5f8b3be2da217d65c5969d4b1e17f973ebbe876879" +checksum = "1039dd0d3c310cf05de012d8a39ff557cb0d23087fd44cad61df08fc31907a2f" dependencies = [ - "zerocopy-derive 0.8.24", + "zerocopy-derive 0.8.26", ] [[package]] @@ -1565,9 +1569,9 @@ dependencies = [ [[package]] name = "zerocopy-derive" -version = "0.8.24" +version = "0.8.26" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a996a8f63c5c4448cd959ac1bab0aaa3306ccfd060472f85943ee0750f0169be" +checksum = "9ecf5b4cc5364572d7f4c329661bcc82724222973f2cab6f050a4e5c22f75181" dependencies = [ "proc-macro2", "quote", diff --git a/fuzz/Cargo.toml b/fuzz/Cargo.toml index c4536bcfed..27bfc87edb 100644 --- a/fuzz/Cargo.toml +++ b/fuzz/Cargo.toml @@ -28,13 +28,13 @@ mshv-bindings = "0.5.0" net_util = { path = "../net_util" } seccompiler = "0.5.0" virtio-devices = { path = "../virtio-devices" } -virtio-queue = "0.14.0" +virtio-queue = "0.16.0" vm-device = { path = "../vm-device" } vm-memory = "0.16.0" vm-migration = { path = "../vm-migration" } vm-virtio = { path = "../vm-virtio" } vmm = { path = "../vmm", features = ["guest_debug"] } -vmm-sys-util = "0.12.1" +vmm-sys-util = "0.14.0" # Prevent this from interfering with workspaces [workspace] diff --git a/hypervisor/src/kvm/mod.rs b/hypervisor/src/kvm/mod.rs index 591a09586f..8bb49e52ca 100644 --- a/hypervisor/src/kvm/mod.rs +++ b/hypervisor/src/kvm/mod.rs @@ -44,7 +44,7 @@ pub use crate::riscv64::{ use crate::vm::{self, InterruptSourceConfig, VmOps}; #[cfg(target_arch = "aarch64")] use crate::{arm64_core_reg_id, offset_of}; -use crate::{cpu, hypervisor, vec_with_array_field, HypervisorType}; +use crate::{cpu, hypervisor, HypervisorType}; #[cfg(target_arch = "riscv64")] use crate::{offset_of, riscv64_reg_id}; // x86_64 dependencies @@ -106,7 +106,7 @@ pub use kvm_ioctls::{Cap, Kvm, VcpuExit}; use thiserror::Error; use vfio_ioctls::VfioDeviceFd; #[cfg(feature = "tdx")] -use vmm_sys_util::{ioctl::ioctl_with_val, ioctl_ioc_nr, ioctl_iowr_nr}; +use vmm_sys_util::{ioctl::ioctl_with_val, ioctl_iowr_nr}; pub use {kvm_bindings, kvm_ioctls}; #[cfg(target_arch = "aarch64")] @@ -119,8 +119,6 @@ const KVM_CAP_SGX_ATTRIBUTE: u32 = 196; #[cfg(target_arch = "x86_64")] use vmm_sys_util::ioctl_io_nr; -#[cfg(all(not(feature = "tdx"), target_arch = "x86_64"))] -use vmm_sys_util::ioctl_ioc_nr; #[cfg(target_arch = "x86_64")] ioctl_io_nr!(KVM_NMI, kvm_bindings::KVMIO, 0x9a); @@ -693,10 +691,6 @@ impl vm::Vm for KvmVm { /// entries, as per the `KVM_SET_GSI_ROUTING` ioctl. /// fn set_gsi_routing(&self, entries: &[IrqRoutingEntry]) -> vm::Result<()> { - let mut irq_routing = - vec_with_array_field::(entries.len()); - irq_routing[0].nr = entries.len() as u32; - irq_routing[0].flags = 0; let entries: Vec = entries .iter() .map(|entry| match entry { @@ -706,17 +700,11 @@ impl vm::Vm for KvmVm { }) .collect(); - // SAFETY: irq_routing initialized with entries.len() and now it is being turned into - // entries_slice with entries.len() again. It is guaranteed to be large enough to hold - // everything from entries. - unsafe { - let entries_slice: &mut [kvm_irq_routing_entry] = - irq_routing[0].entries.as_mut_slice(entries.len()); - entries_slice.copy_from_slice(&entries); - } + let irq_routing = + kvm_bindings::fam_wrappers::KvmIrqRouting::from_entries(&entries).unwrap(); self.fd - .set_gsi_routing(&irq_routing[0]) + .set_gsi_routing(&irq_routing) .map_err(|e| vm::HypervisorVmError::SetGsiRouting(e.into())) } @@ -2956,11 +2944,15 @@ impl KvmVcpu { /// fn set_xsave(&self, xsave: &XsaveState) -> cpu::Result<()> { let xsave: kvm_bindings::kvm_xsave = (*xsave).clone().into(); - self.fd - .lock() - .unwrap() - .set_xsave(&xsave) - .map_err(|e| cpu::HypervisorCpuError::SetXsaveState(e.into())) + // SAFETY: Here we trust the kernel not to read past the end of the kvm_xsave struct + // when calling the kvm-ioctl library function. + unsafe { + self.fd + .lock() + .unwrap() + .set_xsave(&xsave) + .map_err(|e| cpu::HypervisorCpuError::SetXsaveState(e.into())) + } } #[cfg(target_arch = "x86_64")] diff --git a/virtio-devices/Cargo.toml b/virtio-devices/Cargo.toml index fd86b5e3cf..5056fd1acf 100644 --- a/virtio-devices/Cargo.toml +++ b/virtio-devices/Cargo.toml @@ -36,7 +36,7 @@ vhost = { workspace = true, features = [ "vhost-user-frontend", "vhost-vdpa", ] } -virtio-bindings = { workspace = true, features = ["virtio-v5_0_0"] } +virtio-bindings = { workspace = true } virtio-queue = { workspace = true } vm-allocator = { path = "../vm-allocator" } vm-device = { path = "../vm-device" } diff --git a/virtio-devices/src/vdpa.rs b/virtio-devices/src/vdpa.rs index 7a8952f211..decd6ede64 100644 --- a/virtio-devices/src/vdpa.rs +++ b/virtio-devices/src/vdpa.rs @@ -16,7 +16,8 @@ use vhost::vhost_kern::vdpa::VhostKernVdpa; use vhost::vhost_kern::vhost_binding::VHOST_BACKEND_F_SUSPEND; use vhost::vhost_kern::VhostKernFeatures; use vhost::{VhostBackend, VringConfigData}; -use virtio_queue::{Descriptor, Queue, QueueT}; +use virtio_queue::desc::RawDescriptor; +use virtio_queue::{Queue, QueueT}; use vm_device::dma_mapping::ExternalDmaMapping; use vm_memory::{GuestAddress, GuestAddressSpace, GuestMemory, GuestMemoryAtomic}; use vm_migration::{Migratable, MigratableError, Pausable, Snapshot, Snapshottable, Transportable}; @@ -245,7 +246,7 @@ impl Vdpa { flags: 0u32, desc_table_addr: queue.desc_table().translate_gpa( self.common.access_platform.as_ref(), - queue_size as usize * std::mem::size_of::(), + queue_size as usize * std::mem::size_of::(), ), used_ring_addr: queue.used_ring().translate_gpa( self.common.access_platform.as_ref(), diff --git a/virtio-devices/src/vhost_user/vu_common_ctrl.rs b/virtio-devices/src/vhost_user/vu_common_ctrl.rs index 60ee6b32dc..e1204fbd7d 100644 --- a/virtio-devices/src/vhost_user/vu_common_ctrl.rs +++ b/virtio-devices/src/vhost_user/vu_common_ctrl.rs @@ -18,7 +18,8 @@ use vhost::vhost_user::{ Frontend, FrontendReqHandler, VhostUserFrontend, VhostUserFrontendReqHandler, }; use vhost::{VhostBackend, VhostUserDirtyLogRegion, VhostUserMemoryRegionInfo, VringConfigData}; -use virtio_queue::{Descriptor, Queue, QueueT}; +use virtio_queue::desc::RawDescriptor; +use virtio_queue::{Queue, QueueT}; use vm_memory::{ Address, Error as MmapError, FileOffset, GuestAddress, GuestMemory, GuestMemoryRegion, }; @@ -212,7 +213,7 @@ impl VhostUserHandle { desc_table_addr: get_host_address_range( mem, GuestAddress(queue.desc_table()), - actual_size * std::mem::size_of::(), + actual_size * std::mem::size_of::(), ) .ok_or(Error::DescriptorTableAddress)? as u64, // The used ring is {flags: u16; idx: u16; virtq_used_elem [{id: u16, len: u16}; actual_size]}, diff --git a/vm-virtio/src/queue.rs b/vm-virtio/src/queue.rs index 4e55cc4b5d..c33f6e5996 100644 --- a/vm-virtio/src/queue.rs +++ b/vm-virtio/src/queue.rs @@ -12,7 +12,8 @@ pub mod testing { use std::marker::PhantomData; use std::mem; - use virtio_queue::{Queue, QueueT, VirtqUsedElem}; + use virtio_queue::desc::split::VirtqUsedElem; + use virtio_queue::{Queue, QueueT}; use vm_memory::bitmap::AtomicBitmap; use vm_memory::{Address, Bytes, GuestAddress, GuestUsize}; From ea32b67098ffd57b2afef9163e98c83d47dba805 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Tue, 1 Jul 2025 03:29:54 +0000 Subject: [PATCH 003/294] build: Bump proc-macro-crate from 3.2.0 to 3.3.0 Bumps [proc-macro-crate](https://github.com/bkchr/proc-macro-crate) from 3.2.0 to 3.3.0. - [Release notes](https://github.com/bkchr/proc-macro-crate/releases) - [Commits](https://github.com/bkchr/proc-macro-crate/compare/v3.2.0...v3.3.0) --- updated-dependencies: - dependency-name: proc-macro-crate dependency-version: 3.3.0 dependency-type: indirect update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] --- Cargo.lock | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index afd186fa63..cb55afc8e1 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1637,9 +1637,9 @@ dependencies = [ [[package]] name = "proc-macro-crate" -version = "3.2.0" +version = "3.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8ecf48c7ca261d60b74ab1a7b20da18bede46776b2e55535cb958eb595c5fa7b" +checksum = "edce586971a4dfaa28950c6f18ed55e0406c1ab88bbce2c6f6293a7aaba73d35" dependencies = [ "toml_edit", ] From 3d5b4d0b0c6b73e3ba7eb2727f6e01e5e64f01c3 Mon Sep 17 00:00:00 2001 From: ninollei Date: Wed, 4 Jun 2025 13:43:37 +0800 Subject: [PATCH 004/294] vmm: acpi: Use correct table name in error message Fix a copy-paste error using the wrong table name in the assertion Signed-off-by: ninollei --- vmm/src/acpi.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vmm/src/acpi.rs b/vmm/src/acpi.rs index d45f6a7196..e59b63f58b 100644 --- a/vmm/src/acpi.rs +++ b/vmm/src/acpi.rs @@ -780,7 +780,7 @@ pub fn create_acpi_tables( let slit_offset = srat_offset.checked_add(srat.len() as u64).unwrap(); guest_mem .write_slice(slit.as_slice(), slit_offset) - .expect("Error writing SRAT table"); + .expect("Error writing SLIT table"); tables.push(slit_offset.0); prev_tbl_len = slit.len() as u64; From f9c134471ac88ea307d0baae8a8d1434b56b81ea Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Maximilian=20G=C3=BCntner?= Date: Mon, 7 Jul 2025 11:05:46 +0200 Subject: [PATCH 005/294] vmm: warn about deprecation of default IP address + mask MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Issue: #7083 Signed-off-by: Maximilian Güntner --- vmm/src/vm_config.rs | 2 ++ 1 file changed, 2 insertions(+) diff --git a/vmm/src/vm_config.rs b/vmm/src/vm_config.rs index a2c5b996b4..3e6cf447da 100644 --- a/vmm/src/vm_config.rs +++ b/vmm/src/vm_config.rs @@ -350,10 +350,12 @@ pub fn default_netconfig_tap() -> Option { } pub fn default_netconfig_ip() -> IpAddr { + warn!("Deprecation warning: No IP address provided. A default IP address is assigned. This behavior will be deprecated soon."); IpAddr::V4(Ipv4Addr::new(192, 168, 249, 1)) } pub fn default_netconfig_mask() -> IpAddr { + warn!("Deprecation warning: No network mask provided. A default network mask is assigned. This behavior will be deprecated soon."); IpAddr::V4(Ipv4Addr::new(255, 255, 255, 0)) } From 9d4408ba7613d5c8da4e2a4db3bf0a794ed42d17 Mon Sep 17 00:00:00 2001 From: Philipp Schuster Date: Thu, 10 Jul 2025 16:26:38 +0200 Subject: [PATCH 006/294] vmm: add directory path to error message Signed-off-by: Philipp Schuster On-behalf-of: SAP philipp.schuster@sap.com --- vmm/src/migration.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vmm/src/migration.rs b/vmm/src/migration.rs index 2752e82e5b..d93b028055 100644 --- a/vmm/src/migration.rs +++ b/vmm/src/migration.rs @@ -27,7 +27,7 @@ pub fn url_to_path(url: &str) -> std::result::Result { if !path.is_dir() { return Err(MigratableError::MigrateSend(anyhow!( - "Destination is not a directory" + "Destination is not a directory: {path:?}" ))); } From 6ba949d7415a9d7d9a5272563e0ce122a6ac48e1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Maximilian=20G=C3=BCntner?= Date: Wed, 9 Jul 2025 12:00:08 +0200 Subject: [PATCH 007/294] build: consolidate env_logger to workspace, update to 0.11.8 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Maximilian Güntner --- Cargo.lock | 51 ++++++++++++++++++++++++++++++------- Cargo.toml | 1 + hypervisor/Cargo.toml | 2 +- vhost_user_block/Cargo.toml | 2 +- vhost_user_net/Cargo.toml | 2 +- 5 files changed, 46 insertions(+), 12 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index cb55afc8e1..a0ce308144 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -621,14 +621,14 @@ dependencies = [ [[package]] name = "env_logger" -version = "0.11.3" +version = "0.11.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "38b35839ba51819680ba087cd351788c9a3c476841207e0b8cee0b04722343b9" +checksum = "13c863f0904021b108aa8b2f55046443e6b1ebde8fd4a15c399893aae4fa069f" dependencies = [ "anstream", "anstyle", "env_filter", - "humantime", + "jiff", "log", ] @@ -900,12 +900,6 @@ version = "0.4.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7f24254aa9a54b5c858eaee2f5bccdb46aaf0e486a595ed5fd8f86ba55232a70" -[[package]] -name = "humantime" -version = "2.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9a3a5bfb195931eeb336b2a7b4d761daec841b97f947d34394601737a7bba5e4" - [[package]] name = "hypervisor" version = "0.1.0" @@ -1030,6 +1024,30 @@ version = "1.0.15" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4a5f13b858c8d314ee3e8f639011f7ccefe71f97f96e50151fb991f267928e2c" +[[package]] +name = "jiff" +version = "0.2.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "be1f93b8b1eb69c77f24bbb0afdf66f54b632ee39af40ca21c4365a1d7347e49" +dependencies = [ + "jiff-static", + "log", + "portable-atomic", + "portable-atomic-util", + "serde", +] + +[[package]] +name = "jiff-static" +version = "0.2.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "03343451ff899767262ec32146f6d559dd759fdadf42ff0e227c7c48f72594b4" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + [[package]] name = "js-sys" version = "0.3.77" @@ -1626,6 +1644,21 @@ dependencies = [ "windows-sys 0.52.0", ] +[[package]] +name = "portable-atomic" +version = "1.11.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f84267b20a16ea918e43c6a88433c2d54fa145c92a811b5b047ccbe153674483" + +[[package]] +name = "portable-atomic-util" +version = "0.2.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d8a2f0d8d040d7848a709caf78912debcc3f33ee4b3cac47d73d1e1069e83507" +dependencies = [ + "portable-atomic", +] + [[package]] name = "ppv-lite86" version = "0.2.20" diff --git a/Cargo.toml b/Cargo.toml index 0cc8e53a9b..5f21b99a0f 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -129,6 +129,7 @@ igvm_defs = { git = "https://github.com/microsoft/igvm", branch = "main" } serde_json = "1.0.120" # other crates +env_logger = "0.11.8" thiserror = "2.0.12" uuid = { version = "1.17.0" } zerocopy = { version = "0.8.26", default-features = false } diff --git a/hypervisor/Cargo.toml b/hypervisor/Cargo.toml index fed24b6862..60f53c7d4d 100644 --- a/hypervisor/Cargo.toml +++ b/hypervisor/Cargo.toml @@ -62,4 +62,4 @@ optional = true version = "1.21.0" [dev-dependencies] -env_logger = "0.11.3" +env_logger = { workspace = true } diff --git a/vhost_user_block/Cargo.toml b/vhost_user_block/Cargo.toml index 4c63cfa4d0..d9aa7e105d 100644 --- a/vhost_user_block/Cargo.toml +++ b/vhost_user_block/Cargo.toml @@ -8,7 +8,7 @@ version = "0.1.0" [dependencies] block = { path = "../block" } clap = { version = "4.5.13", features = ["cargo", "wrap_help"] } -env_logger = "0.11.3" +env_logger = { workspace = true } epoll = "4.3.3" libc = "0.2.167" log = "0.4.22" diff --git a/vhost_user_net/Cargo.toml b/vhost_user_net/Cargo.toml index d36763b790..515287e2cc 100644 --- a/vhost_user_net/Cargo.toml +++ b/vhost_user_net/Cargo.toml @@ -7,7 +7,7 @@ version = "0.1.0" [dependencies] clap = { version = "4.5.13", features = ["cargo", "wrap_help"] } -env_logger = "0.11.3" +env_logger = { workspace = true } epoll = "4.3.3" libc = "0.2.167" log = "0.4.22" From 19dc733267fecd378e28bff3a0946839e1c15a90 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Maximilian=20G=C3=BCntner?= Date: Wed, 9 Jul 2025 12:03:38 +0200 Subject: [PATCH 008/294] ch-remote: add env_logger, log messages to stderr MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Until now all messages generated using `log::level!` (e.g., `warn!`) have not been printed as `ch-remote` did not register a logger. Furthermore, replace all `eprintln!` with `error!` to align formatting for consistency. Signed-off-by: Maximilian Güntner --- Cargo.lock | 1 + Cargo.toml | 1 + src/bin/ch-remote.rs | 12 +++++++----- 3 files changed, 9 insertions(+), 5 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index a0ce308144..1dfcd3afef 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -409,6 +409,7 @@ dependencies = [ "clap", "dhat", "dirs", + "env_logger", "epoll", "event_monitor", "hypervisor", diff --git a/Cargo.toml b/Cargo.toml index 5f21b99a0f..a913cb6002 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -33,6 +33,7 @@ anyhow = "1.0.94" api_client = { path = "api_client" } clap = { version = "4.5.13", features = ["string"] } dhat = { version = "0.3.3", optional = true } +env_logger = { workspace = true } epoll = "4.3.3" event_monitor = { path = "event_monitor" } hypervisor = { path = "hypervisor" } diff --git a/src/bin/ch-remote.rs b/src/bin/ch-remote.rs index 9d8c4f68b4..4278af6175 100644 --- a/src/bin/ch-remote.rs +++ b/src/bin/ch-remote.rs @@ -17,6 +17,7 @@ use api_client::{ Error as ApiClientError, }; use clap::{Arg, ArgAction, ArgMatches, Command}; +use log::error; use option_parser::{ByteSized, ByteSizedParseError}; use thiserror::Error; use vmm::config::RestoreConfig; @@ -1071,6 +1072,7 @@ fn get_cli_commands_sorted() -> Box<[Command]> { } fn main() { + env_logger::init(); let app = Command::new("ch-remote") .author(env!("CARGO_PKG_AUTHORS")) .version(env!("BUILD_VERSION")) @@ -1092,7 +1094,7 @@ fn main() { #[cfg(not(feature = "dbus_api"))] (Some(api_sock),) => TargetApi::HttpApi( UnixStream::connect(api_sock).unwrap_or_else(|e| { - eprintln!("Error opening HTTP socket: {e}"); + error!("Error opening HTTP socket: {e}"); process::exit(1) }), PhantomData, @@ -1100,7 +1102,7 @@ fn main() { #[cfg(feature = "dbus_api")] (Some(api_sock), None, None) => TargetApi::HttpApi( UnixStream::connect(api_sock).unwrap_or_else(|e| { - eprintln!("Error opening HTTP socket: {e}"); + error!("Error opening HTTP socket: {e}"); process::exit(1) }), PhantomData, @@ -1114,19 +1116,19 @@ fn main() { ) .map_err(Error::DBusApiClient) .unwrap_or_else(|e| { - eprintln!("Error creating D-Bus proxy: {e}"); + error!("Error creating D-Bus proxy: {e}"); process::exit(1) }), ), #[cfg(feature = "dbus_api")] (Some(_), Some(_) | None, Some(_) | None) => { - println!( + error!( "`api-socket` and (dbus-service-name or dbus-object-path) are mutually exclusive" ); process::exit(1); } _ => { - println!("Please either provide the api-socket option or dbus-service-name and dbus-object-path options"); + error!("Please either provide the api-socket option or dbus-service-name and dbus-object-path options"); process::exit(1); } }; From 50b33db718094421c085645941406b13a49eaf09 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Maximilian=20G=C3=BCntner?= Date: Tue, 8 Jul 2025 22:29:29 +0200 Subject: [PATCH 009/294] vmm: replace eprintln with log::error MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Unify log formatting and printing as `eprintln!` and `log::error!` would be used alongside each other. When using e.g. `env_logger` lines printed with `eprintln!` would lack formatting / colors. Currently only relevant in `ch-remote` + `cli_print_error_chain`. Note that the replaced messages now also end up in the logfile of `cloud-hypervisor` when configured and not any longer in stderr. Signed-off-by: Maximilian Güntner --- src/lib.rs | 16 +++++++++------- src/main.rs | 8 ++++---- 2 files changed, 13 insertions(+), 11 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index 1596a13f47..355f0a9cfe 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -4,6 +4,8 @@ use std::error::Error; +use log::error; + /// Prints a chain of errors to the user in a consistent manner. /// The user will see a clear chain of errors, followed by debug output /// for opening issues. @@ -19,10 +21,10 @@ pub fn cli_print_error_chain<'a>( ) { eprint!("Error: {component} exited with the following "); if top_error.source().is_none() { - eprintln!("error:"); - eprintln!(" {top_error}"); + error!("error:"); + error!(" {top_error}"); } else { - eprintln!("chain of errors:"); + error!("chain of errors:"); std::iter::successors(Some(top_error), |sub_error| { // Dereference necessary to mitigate rustc compiler bug. // See @@ -32,13 +34,13 @@ pub fn cli_print_error_chain<'a>( .for_each(|(level, error)| { // Special case: handling of HTTP Server responses in ch-remote if let Some(message) = display_modifier(level, 2, error) { - eprintln!("{message}"); + error!("{message}"); } else { - eprintln!(" {level}: {error}"); + error!(" {level}: {error}"); } }); } - eprintln!(); - eprintln!("Debug Info: {top_error:?}"); + error!(""); + error!("Debug Info: {top_error:?}"); } diff --git a/src/main.rs b/src/main.rs index 4a0fbe91f9..101da1706b 100644 --- a/src/main.rs +++ b/src/main.rs @@ -15,7 +15,7 @@ use std::{env, io}; use clap::{Arg, ArgAction, ArgGroup, ArgMatches, Command}; use event_monitor::event; use libc::EFD_NONBLOCK; -use log::{warn, LevelFilter}; +use log::{error, warn, LevelFilter}; use option_parser::OptionParser; use seccompiler::SeccompAction; use signal_hook::consts::SIGSYS; @@ -561,7 +561,7 @@ fn start_vmm(cmd_arguments: ArgMatches) -> Result, Error> { signal_hook::low_level::emulate_default_handler(SIGSYS).unwrap(); }) } - .map_err(|e| eprintln!("Error adding SIGSYS signal handler: {e}")) + .map_err(|e| error!("Error adding SIGSYS signal handler: {e}")) .ok(); } @@ -575,13 +575,13 @@ fn start_vmm(cmd_arguments: ArgMatches) -> Result, Error> { // dedicated signal handling thread we'll start in a bit. for sig in &vmm::vm::Vm::HANDLED_SIGNALS { if let Err(e) = block_signal(*sig) { - eprintln!("Error blocking signals: {e}"); + error!("Error blocking signals: {e}"); } } for sig in &vmm::Vmm::HANDLED_SIGNALS { if let Err(e) = block_signal(*sig) { - eprintln!("Error blocking signals: {e}"); + error!("Error blocking signals: {e}"); } } From 072f06ff4c8ecf0a473d15237e942f833629565d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Maximilian=20G=C3=BCntner?= Date: Tue, 8 Jul 2025 22:36:55 +0200 Subject: [PATCH 010/294] misc: vhost_user_net: replace eprintln with log::error MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Other lines are already logged using `log::error!` and `env_logger` is initialized before calling `start_net_backend` in `main()`. Signed-off-by: Maximilian Güntner --- vhost_user_net/src/lib.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vhost_user_net/src/lib.rs b/vhost_user_net/src/lib.rs index 433f0c7a29..56b54ad6d8 100644 --- a/vhost_user_net/src/lib.rs +++ b/vhost_user_net/src/lib.rs @@ -348,7 +348,7 @@ pub fn start_net_backend(backend_command: &str) { let backend_config = match VhostUserNetBackendConfig::parse(backend_command) { Ok(config) => config, Err(e) => { - eprintln!("Failed parsing parameters {e:?}"); + error!("Failed parsing parameters {e:?}"); process::exit(1); } }; From 87e74719ec5dd0548bfb5003f6656876eca16712 Mon Sep 17 00:00:00 2001 From: Ruoqing He Date: Wed, 9 Jul 2025 15:18:17 +0000 Subject: [PATCH 011/294] hypervisor: riscv64: Use offset_of from std::mem `std::mem::offset_of` supports calculating offset of nested structures, let's use implementation provided by std instead of manual implementation. Signed-off-by: Ruoqing He --- hypervisor/src/kvm/mod.rs | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) diff --git a/hypervisor/src/kvm/mod.rs b/hypervisor/src/kvm/mod.rs index 8bb49e52ca..c7888cb580 100644 --- a/hypervisor/src/kvm/mod.rs +++ b/hypervisor/src/kvm/mod.rs @@ -14,6 +14,8 @@ use std::any::Any; use std::collections::HashMap; #[cfg(target_arch = "x86_64")] use std::fs::File; +#[cfg(target_arch = "riscv64")] +use std::mem::offset_of; #[cfg(target_arch = "x86_64")] use std::os::unix::io::AsRawFd; #[cfg(feature = "tdx")] @@ -41,12 +43,12 @@ pub use crate::riscv64::{ aia::AiaImsicsState as AiaState, check_required_kvm_extensions, is_non_core_register, VcpuKvmState, }; +#[cfg(target_arch = "riscv64")] +use crate::riscv64_reg_id; use crate::vm::{self, InterruptSourceConfig, VmOps}; #[cfg(target_arch = "aarch64")] use crate::{arm64_core_reg_id, offset_of}; use crate::{cpu, hypervisor, HypervisorType}; -#[cfg(target_arch = "riscv64")] -use crate::{offset_of, riscv64_reg_id}; // x86_64 dependencies #[cfg(target_arch = "x86_64")] pub mod x86_64; @@ -99,7 +101,7 @@ use kvm_bindings::{ KVM_REG_ARM_CORE, KVM_REG_SIZE_U128, KVM_REG_SIZE_U32, KVM_REG_SIZE_U64, }; #[cfg(target_arch = "riscv64")] -use kvm_bindings::{kvm_riscv_core, user_regs_struct, KVM_REG_RISCV_CORE}; +use kvm_bindings::{kvm_riscv_core, KVM_REG_RISCV_CORE}; #[cfg(feature = "tdx")] use kvm_bindings::{kvm_run__bindgen_ty_1, KVMIO}; pub use kvm_ioctls::{Cap, Kvm, VcpuExit}; @@ -1495,7 +1497,7 @@ impl cpu::Vcpu for KvmVcpu { state.mode = u64::from_le_bytes(bytes); }; ($reg_name:ident) => { - let off = offset_of!(kvm_riscv_core, regs, user_regs_struct, $reg_name); + let off = offset_of!(kvm_riscv_core, regs.$reg_name); let mut bytes = [0_u8; 8]; self.fd .lock() @@ -1703,7 +1705,7 @@ impl cpu::Vcpu for KvmVcpu { .map_err(|e| cpu::HypervisorCpuError::SetRiscvCoreRegister(e.into()))?; }; ($reg_name:ident) => { - let off = offset_of!(kvm_riscv_core, regs, user_regs_struct, $reg_name); + let off = offset_of!(kvm_riscv_core, regs.$reg_name); self.fd .lock() .unwrap() @@ -2328,7 +2330,7 @@ impl cpu::Vcpu for KvmVcpu { /// fn setup_regs(&self, cpu_id: u8, boot_ip: u64, fdt_start: u64) -> cpu::Result<()> { // Setting the A0 () to the hartid of this CPU. - let a0 = offset_of!(kvm_riscv_core, regs, user_regs_struct, a0); + let a0 = offset_of!(kvm_riscv_core, regs.a0); self.fd .lock() .unwrap() @@ -2339,7 +2341,7 @@ impl cpu::Vcpu for KvmVcpu { .map_err(|e| cpu::HypervisorCpuError::SetRiscvCoreRegister(e.into()))?; // Setting the PC (Processor Counter) to the current program address (kernel address). - let pc = offset_of!(kvm_riscv_core, regs, user_regs_struct, pc); + let pc = offset_of!(kvm_riscv_core, regs.pc); self.fd .lock() .unwrap() @@ -2352,7 +2354,7 @@ impl cpu::Vcpu for KvmVcpu { // Last mandatory thing to set -> the address pointing to the FDT (also called DTB). // "The device tree blob (dtb) must be placed on an 8-byte boundary and must // not exceed 64 kilobytes in size." -> https://www.kernel.org/doc/Documentation/arch/riscv/boot.txt. - let a1 = offset_of!(kvm_riscv_core, regs, user_regs_struct, a1); + let a1 = offset_of!(kvm_riscv_core, regs.a1); self.fd .lock() .unwrap() @@ -3019,7 +3021,7 @@ mod tests { let vcpu0 = vm.create_vcpu(0, None).unwrap(); let core_regs = StandardRegisters::from(kvm_riscv_core { - regs: user_regs_struct { + regs: kvm_bindings::user_regs_struct { pc: 0x00, ra: 0x01, sp: 0x02, From aa6fefa80f041907a5d264eed8cbca7d1426d6c1 Mon Sep 17 00:00:00 2001 From: Ruoqing He Date: Wed, 9 Jul 2025 15:20:07 +0000 Subject: [PATCH 012/294] hypervisor: riscv64: Remove manually implemented offset_of Manually implemented `_offset_of` and `offset_of` in `arch/riscv64/mod.rs` are not used now, remove them. Signed-off-by: Ruoqing He --- hypervisor/src/kvm/riscv64/mod.rs | 35 ------------------------------- 1 file changed, 35 deletions(-) diff --git a/hypervisor/src/kvm/riscv64/mod.rs b/hypervisor/src/kvm/riscv64/mod.rs index 94017d827b..c589b488a3 100644 --- a/hypervisor/src/kvm/riscv64/mod.rs +++ b/hypervisor/src/kvm/riscv64/mod.rs @@ -13,41 +13,6 @@ use serde::{Deserialize, Serialize}; use crate::kvm::{KvmError, KvmResult}; -// This macro gets the offset of a structure (i.e `str`) member (i.e `field`) without having -// an instance of that structure. -#[macro_export] -macro_rules! _offset_of { - ($str:ty, $field:ident) => {{ - let tmp: std::mem::MaybeUninit<$str> = std::mem::MaybeUninit::uninit(); - let base = tmp.as_ptr(); - - // Avoid warnings when nesting `unsafe` blocks. - #[allow(unused_unsafe)] - // SAFETY: The pointer is valid and aligned, just not initialised. Using `addr_of` ensures - // that we don't actually read from `base` (which would be UB) nor create an intermediate - // reference. - let member = unsafe { core::ptr::addr_of!((*base).$field) } as *const u8; - - // Avoid warnings when nesting `unsafe` blocks. - #[allow(unused_unsafe)] - // SAFETY: The two pointers are within the same allocated object `tmp`. All requirements - // from offset_from are upheld. - unsafe { - member.offset_from(base as *const u8) as usize - } - }}; -} - -#[macro_export] -macro_rules! offset_of { - ($reg_struct:ty, $field:ident) => { - $crate::_offset_of!($reg_struct, $field) - }; - ($outer_reg_struct:ty, $outer_field:ident, $($inner_reg_struct:ty, $inner_field:ident), +) => { - $crate::_offset_of!($outer_reg_struct, $outer_field) + offset_of!($($inner_reg_struct, $inner_field), +) - }; -} - // Following are macros that help with getting the ID of a riscv64 register, including config registers, core registers and timer registers. // The register of core registers are wrapped in the `user_regs_struct` structure. See: // https://elixir.bootlin.com/linux/v6.10/source/arch/riscv/include/uapi/asm/kvm.h#L62 From 008f259aff8f6fc044f5c6de5c6162546eb488cb Mon Sep 17 00:00:00 2001 From: Ruoqing He Date: Thu, 10 Jul 2025 01:54:23 +0000 Subject: [PATCH 013/294] hypervisor: aarch64: Use offset_of from std::mem `std::mem::offset_of` is stabilized since Rust 1.77, let's use implementation provided by std instead of manual implementation. Signed-off-by: Ruoqing He --- hypervisor/src/kvm/mod.rs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/hypervisor/src/kvm/mod.rs b/hypervisor/src/kvm/mod.rs index c7888cb580..fa0ed209df 100644 --- a/hypervisor/src/kvm/mod.rs +++ b/hypervisor/src/kvm/mod.rs @@ -14,7 +14,7 @@ use std::any::Any; use std::collections::HashMap; #[cfg(target_arch = "x86_64")] use std::fs::File; -#[cfg(target_arch = "riscv64")] +#[cfg(any(target_arch = "aarch64", target_arch = "riscv64"))] use std::mem::offset_of; #[cfg(target_arch = "x86_64")] use std::os::unix::io::AsRawFd; @@ -36,6 +36,8 @@ pub use crate::aarch64::{check_required_kvm_extensions, is_system_register, Vcpu use crate::arch::aarch64::gic::{Vgic, VgicConfig}; #[cfg(target_arch = "riscv64")] use crate::arch::riscv64::aia::{Vaia, VaiaConfig}; +#[cfg(target_arch = "aarch64")] +use crate::arm64_core_reg_id; #[cfg(target_arch = "riscv64")] use crate::riscv64::aia::KvmAiaImsics; #[cfg(target_arch = "riscv64")] @@ -46,8 +48,6 @@ pub use crate::riscv64::{ #[cfg(target_arch = "riscv64")] use crate::riscv64_reg_id; use crate::vm::{self, InterruptSourceConfig, VmOps}; -#[cfg(target_arch = "aarch64")] -use crate::{arm64_core_reg_id, offset_of}; use crate::{cpu, hypervisor, HypervisorType}; // x86_64 dependencies #[cfg(target_arch = "x86_64")] From 07cc1f654518aafb51563b2bcbdf6684a6be37b1 Mon Sep 17 00:00:00 2001 From: Ruoqing He Date: Thu, 10 Jul 2025 01:56:26 +0000 Subject: [PATCH 014/294] hypervisor: aarch64: Remove manually implemented offset_of Manually implemented `offset_of` in `arch/aarch64/mod.rs` is not used now, remove it. Signed-off-by: Ruoqing He --- hypervisor/src/kvm/aarch64/mod.rs | 25 ------------------------- 1 file changed, 25 deletions(-) diff --git a/hypervisor/src/kvm/aarch64/mod.rs b/hypervisor/src/kvm/aarch64/mod.rs index 0bef5e07d3..20fef72444 100644 --- a/hypervisor/src/kvm/aarch64/mod.rs +++ b/hypervisor/src/kvm/aarch64/mod.rs @@ -19,31 +19,6 @@ use serde::{Deserialize, Serialize}; use crate::kvm::{KvmError, KvmResult}; -// This macro gets the offset of a structure (i.e `str`) member (i.e `field`) without having -// an instance of that structure. -#[macro_export] -macro_rules! offset_of { - ($str:ty, $field:ident) => {{ - let tmp: std::mem::MaybeUninit<$str> = std::mem::MaybeUninit::uninit(); - let base = tmp.as_ptr(); - - // Avoid warnings when nesting `unsafe` blocks. - #[allow(unused_unsafe)] - // SAFETY: The pointer is valid and aligned, just not initialised. Using `addr_of` ensures - // that we don't actually read from `base` (which would be UB) nor create an intermediate - // reference. - let member = unsafe { core::ptr::addr_of!((*base).$field) } as *const u8; - - // Avoid warnings when nesting `unsafe` blocks. - #[allow(unused_unsafe)] - // SAFETY: The two pointers are within the same allocated object `tmp`. All requirements - // from offset_from are upheld. - unsafe { - member.offset_from(base as *const u8) as usize - } - }}; -} - // Following are macros that help with getting the ID of a aarch64 core register. // The core register are represented by the user_pt_regs structure. Look for it in // arch/arm64/include/uapi/asm/ptrace.h. From 6da5c32fd9dd6cc175a811e4b77f7e95fc633eea Mon Sep 17 00:00:00 2001 From: Ruoqing He Date: Thu, 10 Jul 2025 02:16:44 +0000 Subject: [PATCH 015/294] hypervisor: aarch64: Use offset_of for nested fields `std::mem::offset_of` could be used for calculating nested fields, use this feature to shorten aarch64 reg offset calculation. Signed-off-by: Ruoqing He --- hypervisor/src/kvm/mod.rs | 28 +++++++++++++--------------- vmm/src/cpu.rs | 6 +++--- 2 files changed, 16 insertions(+), 18 deletions(-) diff --git a/hypervisor/src/kvm/mod.rs b/hypervisor/src/kvm/mod.rs index fa0ed209df..8b8351b6de 100644 --- a/hypervisor/src/kvm/mod.rs +++ b/hypervisor/src/kvm/mod.rs @@ -95,10 +95,10 @@ pub use kvm_bindings::{ }; #[cfg(target_arch = "aarch64")] use kvm_bindings::{ - kvm_regs, user_fpsimd_state, user_pt_regs, KVM_GUESTDBG_USE_HW, KVM_NR_SPSR, KVM_REG_ARM64, - KVM_REG_ARM64_SYSREG, KVM_REG_ARM64_SYSREG_CRM_MASK, KVM_REG_ARM64_SYSREG_CRN_MASK, - KVM_REG_ARM64_SYSREG_OP0_MASK, KVM_REG_ARM64_SYSREG_OP1_MASK, KVM_REG_ARM64_SYSREG_OP2_MASK, - KVM_REG_ARM_CORE, KVM_REG_SIZE_U128, KVM_REG_SIZE_U32, KVM_REG_SIZE_U64, + kvm_regs, user_pt_regs, KVM_GUESTDBG_USE_HW, KVM_NR_SPSR, KVM_REG_ARM64, KVM_REG_ARM64_SYSREG, + KVM_REG_ARM64_SYSREG_CRM_MASK, KVM_REG_ARM64_SYSREG_CRN_MASK, KVM_REG_ARM64_SYSREG_OP0_MASK, + KVM_REG_ARM64_SYSREG_OP1_MASK, KVM_REG_ARM64_SYSREG_OP2_MASK, KVM_REG_ARM_CORE, + KVM_REG_SIZE_U128, KVM_REG_SIZE_U32, KVM_REG_SIZE_U64, }; #[cfg(target_arch = "riscv64")] use kvm_bindings::{kvm_riscv_core, KVM_REG_RISCV_CORE}; @@ -1440,7 +1440,7 @@ impl cpu::Vcpu for KvmVcpu { // Now moving on to floating point registers which are stored in the user_fpsimd_state in the kernel: // https://elixir.free-electrons.com/linux/v4.9.62/source/arch/arm64/include/uapi/asm/kvm.h#L53 - let mut off = offset_of!(kvm_regs, fp_regs) + offset_of!(user_fpsimd_state, vregs); + let mut off = offset_of!(kvm_regs, fp_regs.vregs); for i in 0..32 { let mut bytes = [0_u8; 16]; self.fd @@ -1453,7 +1453,7 @@ impl cpu::Vcpu for KvmVcpu { } // Floating-point Status Register - let off = offset_of!(kvm_regs, fp_regs) + offset_of!(user_fpsimd_state, fpsr); + let off = offset_of!(kvm_regs, fp_regs.fpsr); let mut bytes = [0_u8; 4]; self.fd .lock() @@ -1463,7 +1463,7 @@ impl cpu::Vcpu for KvmVcpu { state.fp_regs.fpsr = u32::from_le_bytes(bytes); // Floating-point Control Register - let off = offset_of!(kvm_regs, fp_regs) + offset_of!(user_fpsimd_state, fpcr); + let off = offset_of!(kvm_regs, fp_regs.fpcr); let mut bytes = [0_u8; 4]; self.fd .lock() @@ -1644,7 +1644,7 @@ impl cpu::Vcpu for KvmVcpu { off += std::mem::size_of::(); } - let mut off = offset_of!(kvm_regs, fp_regs) + offset_of!(user_fpsimd_state, vregs); + let mut off = offset_of!(kvm_regs, fp_regs.vregs); for i in 0..32 { self.fd .lock() @@ -1657,7 +1657,7 @@ impl cpu::Vcpu for KvmVcpu { off += mem::size_of::(); } - let off = offset_of!(kvm_regs, fp_regs) + offset_of!(user_fpsimd_state, fpsr); + let off = offset_of!(kvm_regs, fp_regs.fpsr); self.fd .lock() .unwrap() @@ -1667,7 +1667,7 @@ impl cpu::Vcpu for KvmVcpu { ) .map_err(|e| cpu::HypervisorCpuError::SetAarchCoreRegister(e.into()))?; - let off = offset_of!(kvm_regs, fp_regs) + offset_of!(user_fpsimd_state, fpcr); + let off = offset_of!(kvm_regs, fp_regs.fpcr); self.fd .lock() .unwrap() @@ -2281,10 +2281,8 @@ impl cpu::Vcpu for KvmVcpu { /// #[cfg(target_arch = "aarch64")] fn setup_regs(&self, cpu_id: u8, boot_ip: u64, fdt_start: u64) -> cpu::Result<()> { - let kreg_off = offset_of!(kvm_regs, regs); - // Get the register index of the PSTATE (Processor State) register. - let pstate = offset_of!(user_pt_regs, pstate) + kreg_off; + let pstate = offset_of!(kvm_regs, regs.pstate); self.fd .lock() .unwrap() @@ -2297,7 +2295,7 @@ impl cpu::Vcpu for KvmVcpu { // Other vCPUs are powered off initially awaiting PSCI wakeup. if cpu_id == 0 { // Setting the PC (Processor Counter) to the current program address (kernel address). - let pc = offset_of!(user_pt_regs, pc) + kreg_off; + let pc = offset_of!(kvm_regs, regs.pc); self.fd .lock() .unwrap() @@ -2311,7 +2309,7 @@ impl cpu::Vcpu for KvmVcpu { // "The device tree blob (dtb) must be placed on an 8-byte boundary and must // not exceed 2 megabytes in size." -> https://www.kernel.org/doc/Documentation/arm64/booting.txt. // We are choosing to place it the end of DRAM. See `get_fdt_addr`. - let regs0 = offset_of!(user_pt_regs, regs) + kreg_off; + let regs0 = offset_of!(kvm_regs, regs.regs); self.fd .lock() .unwrap() diff --git a/vmm/src/cpu.rs b/vmm/src/cpu.rs index e26946538e..d8278067a5 100644 --- a/vmm/src/cpu.rs +++ b/vmm/src/cpu.rs @@ -2963,19 +2963,19 @@ mod tests { #[cfg(test)] mod tests { #[cfg(feature = "kvm")] - use std::mem; + use std::{mem, mem::offset_of}; use arch::layout; use hypervisor::arch::aarch64::regs::MPIDR_EL1; #[cfg(feature = "kvm")] + use hypervisor::arm64_core_reg_id; + #[cfg(feature = "kvm")] use hypervisor::kvm::aarch64::is_system_register; #[cfg(feature = "kvm")] use hypervisor::kvm::kvm_bindings::{ user_pt_regs, KVM_REG_ARM64, KVM_REG_ARM64_SYSREG, KVM_REG_ARM_CORE, KVM_REG_SIZE_U64, }; use hypervisor::HypervisorCpuError; - #[cfg(feature = "kvm")] - use hypervisor::{arm64_core_reg_id, offset_of}; #[test] fn test_setup_regs() { From 96528f84f95cf9e6006b2192f6f693da04e5418c Mon Sep 17 00:00:00 2001 From: Bo Chen Date: Thu, 10 Jul 2025 19:00:26 +0000 Subject: [PATCH 016/294] build: Bump gdbstub from 0.7.1 to 0.7.6 Bumps [gdbstub](https://github.com/daniel5151/gdbstub) from 0.7.1 to 0.7.6. - [Release notes](https://github.com/daniel5151/gdbstub/releases) - [Changelog](https://github.com/daniel5151/gdbstub/blob/master/CHANGELOG.md) - [Commits](https://github.com/daniel5151/gdbstub/compare/0.7.1...0.7.6) --- updated-dependencies: - dependency-name: gdbstub dependency-version: 0.7.6 dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] Signed-off-by: Bo Chen --- Cargo.lock | 4 ++-- fuzz/Cargo.lock | 4 ++-- vmm/Cargo.toml | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 1dfcd3afef..7acf24989b 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -824,9 +824,9 @@ dependencies = [ [[package]] name = "gdbstub" -version = "0.7.1" +version = "0.7.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6341b3480afbb34eaefc7f92713bc92f2d83e338aaa1c44192f9c2956f4a4903" +checksum = "71d66e32caf5dd59f561be0143e413e01d651bd8498eb9aa0be8c482c81c8d31" dependencies = [ "bitflags 2.9.0", "cfg-if", diff --git a/fuzz/Cargo.lock b/fuzz/Cargo.lock index 85596958ce..4ea15fff34 100644 --- a/fuzz/Cargo.lock +++ b/fuzz/Cargo.lock @@ -403,9 +403,9 @@ checksum = "9fb8e00e87438d937621c1c6269e53f536c14d3fbd6a042bb24879e57d474fb5" [[package]] name = "gdbstub" -version = "0.7.2" +version = "0.7.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fbcc892208d6998fb57e7c3e05883def66f8130924bba066beb0cfe71566a9f6" +checksum = "71d66e32caf5dd59f561be0143e413e01d651bd8498eb9aa0be8c482c81c8d31" dependencies = [ "bitflags 2.9.0", "cfg-if", diff --git a/vmm/Cargo.toml b/vmm/Cargo.toml index 342260c0da..813d74f351 100644 --- a/vmm/Cargo.toml +++ b/vmm/Cargo.toml @@ -40,7 +40,7 @@ epoll = "4.3.3" event_monitor = { path = "../event_monitor" } flume = "0.11.1" futures = { version = "0.3.31", optional = true } -gdbstub = { version = "0.7.1", optional = true } +gdbstub = { version = "0.7.6", optional = true } gdbstub_arch = { version = "0.3.0", optional = true } hex = { version = "0.4.3", optional = true } hypervisor = { path = "../hypervisor" } From 0659eaeba18a03ca32244aa9eac585df2c682786 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Thu, 10 Jul 2025 23:56:32 +0000 Subject: [PATCH 017/294] build: Bump async-signal from 0.2.10 to 0.2.11 Bumps [async-signal](https://github.com/smol-rs/async-signal) from 0.2.10 to 0.2.11. - [Release notes](https://github.com/smol-rs/async-signal/releases) - [Changelog](https://github.com/smol-rs/async-signal/blob/master/CHANGELOG.md) - [Commits](https://github.com/smol-rs/async-signal/compare/v0.2.10...v0.2.11) --- updated-dependencies: - dependency-name: async-signal dependency-version: 0.2.11 dependency-type: indirect update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] --- Cargo.lock | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 7acf24989b..2324d4e720 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -222,9 +222,9 @@ dependencies = [ [[package]] name = "async-signal" -version = "0.2.10" +version = "0.2.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "637e00349800c0bdf8bfc21ebbc0b6524abea702b0da4168ac00d070d0c0b9f3" +checksum = "d7605a4e50d4b06df3898d5a70bf5fde51ed9059b0434b73105193bc27acce0d" dependencies = [ "async-io", "async-lock", @@ -232,7 +232,7 @@ dependencies = [ "cfg-if", "futures-core", "futures-io", - "rustix 0.38.44", + "rustix 1.0.7", "signal-hook-registry", "slab", "windows-sys 0.59.0", From b268e88ba31b172282f2c3556099a667622d7b24 Mon Sep 17 00:00:00 2001 From: Muminul Islam Date: Fri, 11 Jul 2025 10:20:10 -0700 Subject: [PATCH 018/294] virtio-devices: remove unnecessary parentheses Cargo fuzz build report an warning: warning: unnecessary parentheses around closure body --> virtio-devices/src/iommu.rs:578:41 | 578 |.retain(|&x, _| (x < req.virt_start || x > req.virt_end)); | ^ | = note: `#[warn(unused_parens)]` on by default help: remove these parentheses | 578 -.retain(|&x, _| (x < req.virt_start || x > req.virt_end)); 578 +.retain(|&x, _| x < req.virt_start || x > req.virt_end); | warning: `virtio-devices` (lib) generated 1 warning (run `cargo fix --lib -p virtio-devices` to apply 1 suggestion) Signed-off-by: Muminul Islam --- virtio-devices/src/iommu.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/virtio-devices/src/iommu.rs b/virtio-devices/src/iommu.rs index 6a0ccd6be3..cdf9e59087 100644 --- a/virtio-devices/src/iommu.rs +++ b/virtio-devices/src/iommu.rs @@ -575,7 +575,7 @@ impl Request { .get_mut(&domain_id) .unwrap() .mappings - .retain(|&x, _| (x < req.virt_start || x > req.virt_end)); + .retain(|&x, _| x < req.virt_start || x > req.virt_end); } VIRTIO_IOMMU_T_PROBE => { if desc_size_left != size_of::() { From a5cd1b4fbefe95042e3715dba54e24a88fd1d3ca Mon Sep 17 00:00:00 2001 From: Nuno Das Neves Date: Fri, 11 Jul 2025 14:00:22 -0700 Subject: [PATCH 019/294] build: Bump mshv-ioctls and mshv-bindings to v0.5.2 Also update the version in the fuzz crate. Signed-off-by: Nuno Das Neves --- Cargo.lock | 8 ++++---- Cargo.toml | 4 ++-- fuzz/Cargo.lock | 4 ++-- fuzz/Cargo.toml | 2 +- 4 files changed, 9 insertions(+), 9 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 2324d4e720..2da292459b 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1225,9 +1225,9 @@ checksum = "9bec4598fddb13cc7b528819e697852653252b760f1228b7642679bf2ff2cd07" [[package]] name = "mshv-bindings" -version = "0.5.1" +version = "0.5.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "909de5fd4a5a3347a6c62872f6816e6279efd8615a753f10a3bc4daaef8a72ef" +checksum = "07f94f542c738f19317363222a7f415588c04cda964882479af41948ac3c3647" dependencies = [ "libc", "num_enum", @@ -1239,9 +1239,9 @@ dependencies = [ [[package]] name = "mshv-ioctls" -version = "0.5.1" +version = "0.5.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8c7d94972588d562bd349b916de6a43f2ee268e6e9c91cfb5b30549ed4ea2751" +checksum = "8a6df0848f14eb69505a28673f94acdd830cf248fb57022b21f24e242b702e66" dependencies = [ "libc", "mshv-bindings", diff --git a/Cargo.toml b/Cargo.toml index a913cb6002..f46fd602e0 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -107,8 +107,8 @@ acpi_tables = { git = "https://github.com/rust-vmm/acpi_tables", branch = "main" kvm-bindings = "0.12.0" kvm-ioctls = "0.22.0" linux-loader = "0.13.0" -mshv-bindings = "0.5.1" -mshv-ioctls = "0.5.1" +mshv-bindings = "0.5.2" +mshv-ioctls = "0.5.2" seccompiler = "0.5.0" vfio-bindings = { version = "0.5.0", default-features = false } vfio-ioctls = { version = "0.5.0", default-features = false } diff --git a/fuzz/Cargo.lock b/fuzz/Cargo.lock index 4ea15fff34..bb325964f1 100644 --- a/fuzz/Cargo.lock +++ b/fuzz/Cargo.lock @@ -644,9 +644,9 @@ dependencies = [ [[package]] name = "mshv-bindings" -version = "0.5.1" +version = "0.5.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "909de5fd4a5a3347a6c62872f6816e6279efd8615a753f10a3bc4daaef8a72ef" +checksum = "07f94f542c738f19317363222a7f415588c04cda964882479af41948ac3c3647" dependencies = [ "libc", "num_enum", diff --git a/fuzz/Cargo.toml b/fuzz/Cargo.toml index 27bfc87edb..5d6ac0a29e 100644 --- a/fuzz/Cargo.toml +++ b/fuzz/Cargo.toml @@ -24,7 +24,7 @@ libc = "0.2.155" libfuzzer-sys = "0.4.7" linux-loader = { version = "0.13.0", features = ["bzimage", "elf", "pe"] } micro_http = { git = "https://github.com/firecracker-microvm/micro-http", branch = "main" } -mshv-bindings = "0.5.0" +mshv-bindings = "0.5.2" net_util = { path = "../net_util" } seccompiler = "0.5.0" virtio-devices = { path = "../virtio-devices" } From 01aed9733cdc9fc248ce1acb2abca545a325490e Mon Sep 17 00:00:00 2001 From: Alyssa Ross Date: Mon, 14 Jul 2025 17:17:07 +0200 Subject: [PATCH 020/294] build: add missing dependency features This makes it possible to run cargo test just for the virtio-devices crate (as long as either KVM or MSHV is specified). Signed-off-by: Alyssa Ross --- pci/Cargo.toml | 4 ++-- virtio-devices/Cargo.toml | 2 ++ vmm/Cargo.toml | 9 ++++++++- 3 files changed, 12 insertions(+), 3 deletions(-) diff --git a/pci/Cargo.toml b/pci/Cargo.toml index 9273340879..0e29051815 100644 --- a/pci/Cargo.toml +++ b/pci/Cargo.toml @@ -6,8 +6,8 @@ version = "0.1.0" [features] default = [] -kvm = ["vfio-ioctls/kvm"] -mshv = ["vfio-ioctls/mshv"] +kvm = ["hypervisor/kvm", "vfio-ioctls/kvm"] +mshv = ["hypervisor/mshv", "vfio-ioctls/mshv"] [dependencies] anyhow = "1.0.94" diff --git a/virtio-devices/Cargo.toml b/virtio-devices/Cargo.toml index 5056fd1acf..73eaec803c 100644 --- a/virtio-devices/Cargo.toml +++ b/virtio-devices/Cargo.toml @@ -6,6 +6,8 @@ version = "0.1.0" [features] default = [] +kvm = ["pci/kvm"] +mshv = ["pci/mshv"] sev_snp = ["mshv-ioctls"] [dependencies] diff --git a/vmm/Cargo.toml b/vmm/Cargo.toml index 813d74f351..968341eedb 100644 --- a/vmm/Cargo.toml +++ b/vmm/Cargo.toml @@ -16,9 +16,16 @@ kvm = [ "hypervisor/kvm", "pci/kvm", "vfio-ioctls/kvm", + "virtio-devices/kvm", "vm-device/kvm", ] -mshv = ["hypervisor/mshv", "pci/mshv", "vfio-ioctls/mshv", "vm-device/mshv"] +mshv = [ + "hypervisor/mshv", + "pci/mshv", + "vfio-ioctls/mshv", + "virtio-devices/mshv", + "vm-device/mshv", +] pvmemcontrol = ["devices/pvmemcontrol"] sev_snp = ["arch/sev_snp", "hypervisor/sev_snp", "virtio-devices/sev_snp"] tdx = ["arch/tdx", "hypervisor/tdx"] From ec8fceb4a6a537c4d838287d47edc2c156379b4e Mon Sep 17 00:00:00 2001 From: Alyssa Ross Date: Mon, 14 Jul 2025 18:19:42 +0200 Subject: [PATCH 021/294] virtio-devices: stop corrupting vsock commands MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The read_exact() call was introduced in 82ac114b8 ("virtio-devices: vsock: handle short read in muxer") to solve a crash when a connection disconnected without sending any data, but it introduced a problem of its own: because the socket is non-blocking, read_exact() may read some data, then return ErrorKind::WouldBlock. In that case, the data it read will be discarded. So for example if it read "CONNECT ", and then nothing else was available to read yet, "CONNECT " would be discarded, and so the next time this function was called, when epoll triggered again for the socket, only the following data would end up in command.buf, causing an error due to just a port number being an invalid command. Contrary to that commit message, this code was actually designed to handle short reads just fine — in the case of a short read, it stores the data it has read in command, and returns Error::UnixRead(ErrorKind::WouldBlock), which is ignored by the caller, and the function gets called again when there is more data to read, building up command potentially over the course of several reads. The only thing it didn't handle correctly, as far as I can tell, was a 0-byte read, which happens when a client disconnects from the socket without writing anything. All that's needed to fix this is to avoid an invalid subtraction in that case, so this change reverts 82ac114b8, fixing the issue with partial commands being discarded, and instead handles the 0-byte read by using slice::get, and treating an empty command as an incomplete command, which of course it is. Fixes: 82ac114b8 ("virtio-devices: vsock: handle short read in muxer") Signed-off-by: Alyssa Ross --- virtio-devices/src/vsock/unix/muxer.rs | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/virtio-devices/src/vsock/unix/muxer.rs b/virtio-devices/src/vsock/unix/muxer.rs index 55e819d4b7..ebe0dc723b 100644 --- a/virtio-devices/src/vsock/unix/muxer.rs +++ b/virtio-devices/src/vsock/unix/muxer.rs @@ -493,15 +493,18 @@ impl VsockMuxer { const MIN_COMMAND_LEN: usize = 10; // Bring in the minimum number of bytes that we should be able to read. - stream - .read_exact(&mut command.buf[command.len..MIN_COMMAND_LEN]) - .map_err(Error::UnixRead)?; - command.len = MIN_COMMAND_LEN; + if command.len < MIN_COMMAND_LEN { + command.len += stream + .read(&mut command.buf[command.len..MIN_COMMAND_LEN]) + .map_err(Error::UnixRead)?; + } // Now, finish reading the destination port number, by bringing in one byte at a time, // until we reach an EOL terminator (or our buffer space runs out). Yeah, not // particularly proud of this approach, but it will have to do for now. - while command.buf[command.len - 1] != b'\n' && command.len < command.buf.len() { + while command.len.checked_sub(1).map(|n| command.buf[n]) != Some(b'\n') + && command.len < command.buf.len() + { command.len += stream .read(&mut command.buf[command.len..=command.len]) .map_err(Error::UnixRead)?; From e32fa593e5516aeab8412db2cabc5dc758d94fc4 Mon Sep 17 00:00:00 2001 From: Songqian Li Date: Fri, 11 Jul 2025 11:43:30 +0800 Subject: [PATCH 022/294] build: clean up unused dependencies Signed-off-by: Songqian Li --- Cargo.lock | 13 ------------- arch/Cargo.toml | 1 - performance-metrics/Cargo.toml | 1 - test_infra/Cargo.toml | 1 - tpm/Cargo.toml | 1 - vhost_user_block/Cargo.toml | 1 - virtio-devices/Cargo.toml | 3 --- vm-device/Cargo.toml | 2 -- vm-virtio/Cargo.toml | 1 - vmm/Cargo.toml | 2 -- 10 files changed, 26 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 2da292459b..7e239b3092 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -119,7 +119,6 @@ dependencies = [ "uuid", "vm-fdt", "vm-memory", - "vm-migration", "vmm-sys-util", ] @@ -1507,7 +1506,6 @@ dependencies = [ "serde_json", "test_infra", "thiserror 2.0.12", - "wait-timeout", ] [[package]] @@ -2065,7 +2063,6 @@ dependencies = [ "dirs", "epoll", "libc", - "serde", "serde_json", "ssh2", "thiserror 2.0.12", @@ -2141,7 +2138,6 @@ name = "tpm" version = "0.1.0" dependencies = [ "anyhow", - "byteorder", "libc", "log", "net_gen", @@ -2312,7 +2308,6 @@ dependencies = [ "block", "clap", "env_logger", - "epoll", "libc", "log", "option_parser", @@ -2355,7 +2350,6 @@ name = "virtio-devices" version = "0.1.0" dependencies = [ "anyhow", - "arc-swap", "block", "byteorder", "epoll", @@ -2363,13 +2357,11 @@ dependencies = [ "libc", "log", "mshv-ioctls", - "net_gen", "net_util", "pci", "rate_limiter", "seccompiler", "serde", - "serde_json", "serde_with", "serial_buffer", "thiserror 2.0.12", @@ -2409,12 +2401,10 @@ dependencies = [ name = "vm-device" version = "0.1.0" dependencies = [ - "anyhow", "hypervisor", "serde", "thiserror 2.0.12", "vfio-ioctls", - "vm-memory", "vmm-sys-util", ] @@ -2450,7 +2440,6 @@ dependencies = [ name = "vm-virtio" version = "0.1.0" dependencies = [ - "log", "virtio-queue", "vm-memory", ] @@ -2461,7 +2450,6 @@ version = "0.1.0" dependencies = [ "acpi_tables", "anyhow", - "arc-swap", "arch", "bitflags 2.9.0", "block", @@ -2503,7 +2491,6 @@ dependencies = [ "vfio_user", "virtio-bindings", "virtio-devices", - "virtio-queue", "vm-allocator", "vm-device", "vm-memory", diff --git a/arch/Cargo.toml b/arch/Cargo.toml index 4c068d131f..03d2ad4af5 100644 --- a/arch/Cargo.toml +++ b/arch/Cargo.toml @@ -21,7 +21,6 @@ serde = { version = "1.0.208", features = ["derive", "rc"] } thiserror = { workspace = true } uuid = { workspace = true } vm-memory = { workspace = true, features = ["backend-bitmap", "backend-mmap"] } -vm-migration = { path = "../vm-migration" } vmm-sys-util = { workspace = true, features = ["with-serde"] } [target.'cfg(any(target_arch = "aarch64", target_arch = "riscv64"))'.dependencies] diff --git a/performance-metrics/Cargo.toml b/performance-metrics/Cargo.toml index 87dce7862e..8572bf5160 100644 --- a/performance-metrics/Cargo.toml +++ b/performance-metrics/Cargo.toml @@ -12,4 +12,3 @@ serde = { version = "1.0.208", features = ["derive", "rc"] } serde_json = { workspace = true } test_infra = { path = "../test_infra" } thiserror = { workspace = true } -wait-timeout = "0.2.0" diff --git a/test_infra/Cargo.toml b/test_infra/Cargo.toml index 0374bb0fad..5aeae23c58 100644 --- a/test_infra/Cargo.toml +++ b/test_infra/Cargo.toml @@ -8,7 +8,6 @@ version = "0.1.0" dirs = "6.0.0" epoll = "4.3.3" libc = "0.2.167" -serde = { version = "1.0.208", features = ["derive", "rc"] } serde_json = { workspace = true } ssh2 = { version = "0.9.4", features = ["vendored-openssl"] } thiserror = { workspace = true } diff --git a/tpm/Cargo.toml b/tpm/Cargo.toml index 076be121ef..ab257ec189 100644 --- a/tpm/Cargo.toml +++ b/tpm/Cargo.toml @@ -7,7 +7,6 @@ version = "0.1.0" [dependencies] anyhow = "1.0.81" -byteorder = "1.5.0" libc = "0.2.153" log = "0.4.21" net_gen = { path = "../net_gen" } diff --git a/vhost_user_block/Cargo.toml b/vhost_user_block/Cargo.toml index d9aa7e105d..0648d13be5 100644 --- a/vhost_user_block/Cargo.toml +++ b/vhost_user_block/Cargo.toml @@ -9,7 +9,6 @@ version = "0.1.0" block = { path = "../block" } clap = { version = "4.5.13", features = ["cargo", "wrap_help"] } env_logger = { workspace = true } -epoll = "4.3.3" libc = "0.2.167" log = "0.4.22" option_parser = { path = "../option_parser" } diff --git a/virtio-devices/Cargo.toml b/virtio-devices/Cargo.toml index 73eaec803c..a4c70d111f 100644 --- a/virtio-devices/Cargo.toml +++ b/virtio-devices/Cargo.toml @@ -12,7 +12,6 @@ sev_snp = ["mshv-ioctls"] [dependencies] anyhow = "1.0.94" -arc-swap = "1.7.1" block = { path = "../block" } byteorder = "1.5.0" epoll = "4.3.3" @@ -20,13 +19,11 @@ event_monitor = { path = "../event_monitor" } libc = "0.2.167" log = "0.4.22" mshv-ioctls = { workspace = true, optional = true } -net_gen = { path = "../net_gen" } net_util = { path = "../net_util" } pci = { path = "../pci" } rate_limiter = { path = "../rate_limiter" } seccompiler = { workspace = true } serde = { version = "1.0.208", features = ["derive"] } -serde_json = { workspace = true } serde_with = { version = "3.9.0", default-features = false, features = [ "macros", ] } diff --git a/vm-device/Cargo.toml b/vm-device/Cargo.toml index cc24dc4764..8262f84b00 100644 --- a/vm-device/Cargo.toml +++ b/vm-device/Cargo.toml @@ -10,10 +10,8 @@ kvm = ["vfio-ioctls/kvm"] mshv = ["vfio-ioctls/mshv"] [dependencies] -anyhow = "1.0.94" hypervisor = { path = "../hypervisor" } serde = { version = "1.0.208", features = ["derive", "rc"] } thiserror = { workspace = true } vfio-ioctls = { workspace = true, default-features = false } -vm-memory = { workspace = true, features = ["backend-mmap"] } vmm-sys-util = { workspace = true } diff --git a/vm-virtio/Cargo.toml b/vm-virtio/Cargo.toml index 7a5492430e..b22a2f5551 100644 --- a/vm-virtio/Cargo.toml +++ b/vm-virtio/Cargo.toml @@ -8,7 +8,6 @@ version = "0.1.0" default = [] [dependencies] -log = "0.4.22" virtio-queue = { workspace = true } vm-memory = { workspace = true, features = [ "backend-atomic", diff --git a/vmm/Cargo.toml b/vmm/Cargo.toml index 968341eedb..b28946f320 100644 --- a/vmm/Cargo.toml +++ b/vmm/Cargo.toml @@ -34,7 +34,6 @@ tracing = ["tracer/tracing"] [dependencies] acpi_tables = { workspace = true } anyhow = "1.0.94" -arc-swap = "1.7.1" arch = { path = "../arch" } bitflags = "2.9.0" block = { path = "../block" } @@ -79,7 +78,6 @@ vfio-ioctls = { workspace = true, default-features = false } vfio_user = { workspace = true } virtio-bindings = { workspace = true } virtio-devices = { path = "../virtio-devices" } -virtio-queue = { workspace = true } vm-allocator = { path = "../vm-allocator" } vm-device = { path = "../vm-device" } vm-memory = { workspace = true, features = [ From cea708deb92c5c277f42df75a05840191cfeb67c Mon Sep 17 00:00:00 2001 From: Wei Liu Date: Wed, 16 Jul 2025 17:17:09 +0000 Subject: [PATCH 023/294] performance-metrics: Fix the names of the kernels In 2b0575371693, the names of the reference kernels are changed. Signed-off-by: Wei Liu --- performance-metrics/src/performance_tests.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/performance-metrics/src/performance_tests.rs b/performance-metrics/src/performance_tests.rs index 7bbecf8898..a2d7004900 100644 --- a/performance-metrics/src/performance_tests.rs +++ b/performance-metrics/src/performance_tests.rs @@ -68,9 +68,9 @@ fn direct_kernel_boot_path() -> PathBuf { let mut kernel_path = workload_path; #[cfg(target_arch = "x86_64")] - kernel_path.push("vmlinux"); + kernel_path.push("vmlinux-x86_64"); #[cfg(target_arch = "aarch64")] - kernel_path.push("Image"); + kernel_path.push("Image-arm64"); kernel_path } From 987ad11c90d9dcc6189d61f7e2517d84eb70668a Mon Sep 17 00:00:00 2001 From: Bo Chen Date: Wed, 16 Jul 2025 22:27:05 +0000 Subject: [PATCH 024/294] main: Report errors with 'error!()' This was missed from #7183, likely because `eprint!` is used instead of `eprintln!`. Signed-off-by: Bo Chen --- src/lib.rs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index 355f0a9cfe..b8065083de 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -19,12 +19,12 @@ pub fn cli_print_error_chain<'a>( &'a (dyn Error + 'static), ) -> Option, ) { - eprint!("Error: {component} exited with the following "); + let msg = format!("Error: {component} exited with the following"); if top_error.source().is_none() { - error!("error:"); + error!("{msg} error:"); error!(" {top_error}"); } else { - error!("chain of errors:"); + error!("{msg} chain of errors:"); std::iter::successors(Some(top_error), |sub_error| { // Dereference necessary to mitigate rustc compiler bug. // See From 4528e2f1eaaabb60aa027b5619072676b21e571d Mon Sep 17 00:00:00 2001 From: Jean-Philippe Brucker Date: Tue, 15 Jul 2025 17:36:57 +0100 Subject: [PATCH 025/294] devices: rtc_pl031: Disable broken interrupt The PL031 RTC provides two features: a real-time counter and an alarm interrupt. To use the alarm, the driver normally writes a time value into the match register RTCMR, and when the counter reaches that value the device triggers the interrupt. At the moment the implementation ignores programming of the alarm, as the feature seems rarely used in VMs. However the interrupt is still triggered arbitrarily when the guest writes to registers, and the line is never cleared. This really confuses the Linux driver, which loops in the interrupt handler until Linux realizes that no one is dealing with the interrupt (200000 unanswered calls) and disables the handler. One way to fix this would be implementing the alarm function properly, which isn't too difficult but requires adding some async timer logic which probably won't ever get used. In addition the device's interrupt is level-triggered and we don't support level interrupts at the moment, though we could probably get away with changing this interrupt to edge. The simplest fix, though, is to just disable the interrupt logic entirely, so that the alarm function still doesn't work but the guest doesn't see spurious interrupts. Add a default() implementation to satisfy clippy's new_without_default check, since Rtc::new() doesn't take a parameter after this change. Signed-off-by: Jean-Philippe Brucker --- devices/src/legacy/rtc_pl031.rs | 102 +++++++------------------------- vmm/src/device_manager.rs | 8 +-- 2 files changed, 24 insertions(+), 86 deletions(-) diff --git a/devices/src/legacy/rtc_pl031.rs b/devices/src/legacy/rtc_pl031.rs index 39c7911eed..98bca77af2 100644 --- a/devices/src/legacy/rtc_pl031.rs +++ b/devices/src/legacy/rtc_pl031.rs @@ -4,16 +4,18 @@ //! ARM PL031 Real Time Clock //! -//! This module implements a PL031 Real Time Clock (RTC) that provides to provides long time base counter. -//! This is achieved by generating an interrupt signal after counting for a programmed number of cycles of -//! a real-time clock input. +//! This module implements part of a PL031 Real Time Clock (RTC): +//! * provide a clock value via RTCDR +//! * no alarm is implemented through the match register +//! * no interrupt is generated +//! * RTC cannot be disabled via RTCCR +//! * no test registers //! +use std::result; use std::sync::{Arc, Barrier}; use std::time::Instant; -use std::{io, result}; use thiserror::Error; -use vm_device::interrupt::InterruptSourceGroup; use vm_device::BusDevice; use crate::{read_le_u32, write_le_u32}; @@ -45,8 +47,6 @@ pub const NANOS_PER_SECOND: u64 = 1_000_000_000; pub enum Error { #[error("Bad Write Offset: {0}")] BadWriteOffset(u64), - #[error("Failed to trigger interrupt")] - InterruptFailure(#[source] io::Error), } type Result = result::Result; @@ -107,31 +107,20 @@ pub struct Rtc { match_value: u32, // Writes to this register load an update value into the RTC. load: u32, - imsc: u32, - ris: u32, - interrupt: Arc, } impl Rtc { /// Constructs an AMBA PL031 RTC device. - pub fn new(interrupt: Arc) -> Self { + pub fn new() -> Self { Self { // This is used only for duration measuring purposes. previous_now: Instant::now(), tick_offset: get_time(ClockType::Real) as i64, match_value: 0, load: 0, - imsc: 0, - ris: 0, - interrupt, } } - fn trigger_interrupt(&mut self) -> Result<()> { - self.interrupt.trigger(0).map_err(Error::InterruptFailure)?; - Ok(()) - } - fn get_time(&self) -> u32 { let ts = (self.tick_offset as i128) + (Instant::now().duration_since(self.previous_now).as_nanos() as i128); @@ -155,16 +144,8 @@ impl Rtc { // we want to terminate the execution of the process. self.tick_offset = seconds_to_nanoseconds(i64::from(val)).unwrap(); } - RTCIMSC => { - self.imsc = val & 1; - self.trigger_interrupt()?; - } - RTCICR => { - // As per above mentioned doc, the interrupt is cleared by writing any data value to - // the Interrupt Clear Register. - self.ris = 0; - self.trigger_interrupt()?; - } + RTCIMSC => (), + RTCICR => (), RTCCR => (), // ignore attempts to turn off the timer. o => { return Err(Error::BadWriteOffset(o)); @@ -174,6 +155,12 @@ impl Rtc { } } +impl Default for Rtc { + fn default() -> Self { + Self::new() + } +} + impl BusDevice for Rtc { fn read(&mut self, _base: u64, offset: u64, data: &mut [u8]) { let mut read_ok = true; @@ -189,10 +176,10 @@ impl BusDevice for Rtc { self.match_value } RTCLR => self.load, - RTCCR => 1, // RTC is always enabled. - RTCIMSC => self.imsc, - RTCRIS => self.ris, - RTCMIS => self.ris & self.imsc, + RTCCR => 1, // RTC is always enabled. + RTCIMSC => 0, // Interrupt is always disabled. + RTCRIS => 0, + RTCMIS => 0, _ => { read_ok = false; 0 @@ -230,9 +217,6 @@ impl BusDevice for Rtc { #[cfg(test)] mod tests { - use vm_device::interrupt::{InterruptIndex, InterruptSourceConfig}; - use vmm_sys_util::eventfd::EventFd; - use super::*; use crate::{ read_be_u16, read_be_u32, read_le_i32, read_le_u16, read_le_u64, write_be_u16, @@ -366,45 +350,9 @@ mod tests { assert!(seconds_to_nanoseconds(9_223_372_037).is_none()); } - struct TestInterrupt { - event_fd: EventFd, - } - - impl InterruptSourceGroup for TestInterrupt { - fn trigger(&self, _index: InterruptIndex) -> result::Result<(), std::io::Error> { - self.event_fd.write(1) - } - - fn update( - &self, - _index: InterruptIndex, - _config: InterruptSourceConfig, - _masked: bool, - _set_gsi: bool, - ) -> result::Result<(), std::io::Error> { - Ok(()) - } - - fn set_gsi(&self) -> result::Result<(), std::io::Error> { - Ok(()) - } - - fn notifier(&self, _index: InterruptIndex) -> Option { - Some(self.event_fd.try_clone().unwrap()) - } - } - - impl TestInterrupt { - fn new(event_fd: EventFd) -> Self { - TestInterrupt { event_fd } - } - } - #[test] fn test_rtc_read_write_and_event() { - let intr_evt = EventFd::new(libc::EFD_NONBLOCK).unwrap(); - - let mut rtc = Rtc::new(Arc::new(TestInterrupt::new(intr_evt.try_clone().unwrap()))); + let mut rtc = Rtc::new(); let mut data = [0; 4]; // Read and write to the MR register. @@ -427,15 +375,13 @@ mod tests { assert_eq!((v / NANOS_PER_SECOND) as u32, v_read); // Read and write to IMSC register. - // Test with non zero value. + // Test with non zero value. Our device ignores the write. let non_zero = 1; write_le_u32(&mut data, non_zero); rtc.write(LEGACY_RTC_MAPPED_IO_START, RTCIMSC, &data); - // The interrupt line should be on. - assert!(rtc.interrupt.notifier(0).unwrap().read().unwrap() == 1); rtc.read(LEGACY_RTC_MAPPED_IO_START, RTCIMSC, &mut data); let v = read_le_u32(&data); - assert_eq!(non_zero & 1, v); + assert_eq!(0, v); // Now test with 0. write_le_u32(&mut data, 0); @@ -447,8 +393,6 @@ mod tests { // Read and write to the ICR register. write_le_u32(&mut data, 1); rtc.write(LEGACY_RTC_MAPPED_IO_START, RTCICR, &data); - // The interrupt line should be on. - assert!(rtc.interrupt.notifier(0).unwrap().read().unwrap() > 1); let v_before = read_le_u32(&data); rtc.read(LEGACY_RTC_MAPPED_IO_START, RTCICR, &mut data); diff --git a/vmm/src/device_manager.rs b/vmm/src/device_manager.rs index fccdd75bfa..2df6f1d585 100644 --- a/vmm/src/device_manager.rs +++ b/vmm/src/device_manager.rs @@ -1931,13 +1931,7 @@ impl DeviceManager { .allocate_irq() .unwrap(); - let interrupt_group = interrupt_manager - .create_group(LegacyIrqGroupConfig { - irq: rtc_irq as InterruptIndex, - }) - .map_err(DeviceManagerError::CreateInterruptGroup)?; - - let rtc_device = Arc::new(Mutex::new(devices::legacy::Rtc::new(interrupt_group))); + let rtc_device = Arc::new(Mutex::new(devices::legacy::Rtc::new())); self.bus_devices .push(Arc::clone(&rtc_device) as Arc); From 4ea40b4beac71180588a37de5f4674ebcca9f31e Mon Sep 17 00:00:00 2001 From: Wei Liu Date: Thu, 17 Jul 2025 16:59:06 +0000 Subject: [PATCH 026/294] rate_limiter: Use Mutex::get_mut() in update_buckets There is no need to lock. That function already holds a mutable reference to self. Signed-off-by: Wei Liu --- rate_limiter/src/lib.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rate_limiter/src/lib.rs b/rate_limiter/src/lib.rs index 4202bfd1a1..9d67993386 100644 --- a/rate_limiter/src/lib.rs +++ b/rate_limiter/src/lib.rs @@ -486,7 +486,7 @@ impl RateLimiter { /// Updates the parameters of the token buckets associated with this RateLimiter. // TODO: Please note that, right now, the buckets become full after being updated. pub fn update_buckets(&mut self, bytes: BucketUpdate, ops: BucketUpdate) { - let mut guard = self.inner.lock().unwrap(); + let guard = self.inner.get_mut().unwrap(); match bytes { BucketUpdate::Disabled => guard.bandwidth = None, BucketUpdate::Update(tb) => guard.bandwidth = Some(tb), From 5716af09a5abaefdce4b7e973cd9faa37099a7b9 Mon Sep 17 00:00:00 2001 From: Wei Liu Date: Thu, 17 Jul 2025 17:03:01 +0000 Subject: [PATCH 027/294] vhost_user_block: Use Mutex::get_mut() where possible Signed-off-by: Wei Liu --- vhost_user_block/src/lib.rs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/vhost_user_block/src/lib.rs b/vhost_user_block/src/lib.rs index 040eb18353..5e668c0c5a 100644 --- a/vhost_user_block/src/lib.rs +++ b/vhost_user_block/src/lib.rs @@ -334,8 +334,8 @@ impl VhostUserBackendMut for VhostUserBlkBackend { } fn set_event_idx(&mut self, enabled: bool) { - for thread in self.threads.iter() { - thread.lock().unwrap().event_idx = enabled; + for thread in self.threads.iter_mut() { + thread.get_mut().unwrap().event_idx = enabled; } } @@ -352,7 +352,7 @@ impl VhostUserBackendMut for VhostUserBlkBackend { debug!("event received: {:?}", device_event); - let mut thread = self.threads[thread_id].lock().unwrap(); + let thread = self.threads[thread_id].get_mut().unwrap(); match device_event { 0 => { let mut vring = vrings[0].get_mut(); From 4be2ca4c10ba388a3798c08d100bf172fe6e4e6d Mon Sep 17 00:00:00 2001 From: Wei Liu Date: Thu, 17 Jul 2025 17:06:21 +0000 Subject: [PATCH 028/294] vhost_user_net: Use Mutex::get_mut() where possible Signed-off-by: Wei Liu --- vhost_user_net/src/lib.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vhost_user_net/src/lib.rs b/vhost_user_net/src/lib.rs index 56b54ad6d8..de2a416aa0 100644 --- a/vhost_user_net/src/lib.rs +++ b/vhost_user_net/src/lib.rs @@ -205,7 +205,7 @@ impl VhostUserBackendMut for VhostUserNetBackend { vrings: &[VringRwLock>], thread_id: usize, ) -> VhostUserBackendResult<()> { - let mut thread = self.threads[thread_id].lock().unwrap(); + let thread = self.threads[thread_id].get_mut().unwrap(); match device_event { 0 => { if !thread.net.rx_tap_listening { From 62001b65e9d165e489b60d1e5a17e3db22142f30 Mon Sep 17 00:00:00 2001 From: Bo Chen Date: Wed, 16 Jul 2025 23:10:21 +0000 Subject: [PATCH 029/294] build: Release v47.0 Signed-off-by: Bo Chen --- Cargo.lock | 2 +- Cargo.toml | 2 +- release-notes.md | 237 ++++++++++++++++++++++++++++++----------------- 3 files changed, 154 insertions(+), 87 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 7e239b3092..305e31cee0 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -401,7 +401,7 @@ checksum = "f46ad14479a25103f283c0f10005961cf086d8dc42205bb44c46ac563475dca6" [[package]] name = "cloud-hypervisor" -version = "46.0.0" +version = "47.0.0" dependencies = [ "anyhow", "api_client", diff --git a/Cargo.toml b/Cargo.toml index f46fd602e0..f05de398ee 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -7,7 +7,7 @@ edition = "2021" homepage = "https://github.com/cloud-hypervisor/cloud-hypervisor" license = "Apache-2.0 AND BSD-3-Clause" name = "cloud-hypervisor" -version = "46.0.0" +version = "47.0.0" # Minimum buildable version: # Keep in sync with version in .github/workflows/build.yaml # Policy on MSRV (see #4318): diff --git a/release-notes.md b/release-notes.md index 559066feb4..0eb024c966 100644 --- a/release-notes.md +++ b/release-notes.md @@ -1,50 +1,57 @@ +- [v47.0](#v470) + - [Block Device Error Reporting to the Guest](#block-device-error-reporting-to-the-guest) + - [Nice Error Messages on Exit](#nice-error-messages-on-exit) + - [Alphabetically Sorted CLI Options for ch-remote](#alphabetically-sorted-cli-options-for-ch-remote) + - [Notable Bug Fixes](#notable-bug-fixes) + - [Deprecations](#deprecations) + - [Contributors](#contributors) - [v46.0](#v460) - [File-level Locking Support with `--disk`](#file-level-locking-support-with---disk) - [Improved Error Reporting with VM Resizing](#improved-error-reporting-with-vm-resizing) - [IPv6 Address Support with `--net`](#ipv6-address-support-with---net) - [Experimental AArch64 Support with the MSHV Hypervisor](#experimental-aarch64-support-with-the-mshv-hypervisor) - [Deprecated SGX Support](#deprecated-sgx-support) - - [Notable Bug Fixes](#notable-bug-fixes) - - [Contributors](#contributors) + - [Notable Bug Fixes](#notable-bug-fixes-1) + - [Contributors](#contributors-1) - [v45.0](#v450) - [Experimental `riscv64` Architecture Support](#experimental-riscv64-architecture-support) - [Alphabetically Sorted CLI Options](#alphabetically-sorted-cli-options) - [Improved Downtime of VM Live Migration](#improved-downtime-of-vm-live-migration) - - [Notable Bug Fixes](#notable-bug-fixes-1) - - [Contributors](#contributors-1) + - [Notable Bug Fixes](#notable-bug-fixes-2) + - [Contributors](#contributors-2) - [v44.0](#v440) - [Configurable `virtio-iommu` Address Width](#configurable-virtio-iommu-address-width) - [Notable Performance Improvements](#notable-performance-improvements) - [New Fuzzers](#new-fuzzers) - - [Notable Bug Fixes](#notable-bug-fixes-2) - - [Contributors](#contributors-2) + - [Notable Bug Fixes](#notable-bug-fixes-3) + - [Contributors](#contributors-3) - [v43.0](#v430) - [Live Migration over TCP Connections](#live-migration-over-tcp-connections) - [Notable Performance Improvements](#notable-performance-improvements-1) - - [Notable Bug Fixes](#notable-bug-fixes-3) - - [Contributors](#contributors-3) + - [Notable Bug Fixes](#notable-bug-fixes-4) + - [Contributors](#contributors-4) - [v42.0](#v420) - [SVE/SVE2 Support on AArch64](#svesve2-support-on-aarch64) - - [Notable Bug Fixes](#notable-bug-fixes-4) + - [Notable Bug Fixes](#notable-bug-fixes-5) - [Sponsorships](#sponsorships) - - [Contributors](#contributors-4) + - [Contributors](#contributors-5) - [v41.0](#v410) - [Experimental "Pvmemcontrol" Support](#experimental-pvmemcontrol-support) - [Sandboxing With Landlock Support](#sandboxing-with-landlock-support) - [Notable Performance Improvements](#notable-performance-improvements-2) - - [Notable Bug Fixes](#notable-bug-fixes-5) - - [Contributors](#contributors-5) -- [v40.0](#v400) - - [Support for Restoring File Descriptor Backed Network Devices](#support-for-restoring-file-descriptor-backed-network-devices) - [Notable Bug Fixes](#notable-bug-fixes-6) - [Contributors](#contributors-6) +- [v40.0](#v400) + - [Support for Restoring File Descriptor Backed Network Devices](#support-for-restoring-file-descriptor-backed-network-devices) + - [Notable Bug Fixes](#notable-bug-fixes-7) + - [Contributors](#contributors-7) - [v39.0](#v390) - [Variable Sizing of PCI Apertures for Segments](#variable-sizing-of-pci-apertures-for-segments) - [Direct Booting with bzImages](#direct-booting-with-bzimages) - [Support for NVIDIA GPUDirect P2P Support](#support-for-nvidia-gpudirect-p2p-support) - [Guest NMI Injection Support](#guest-nmi-injection-support) - - [Notable Bug Fixes](#notable-bug-fixes-7) - - [Contributors](#contributors-7) + - [Notable Bug Fixes](#notable-bug-fixes-8) + - [Contributors](#contributors-8) - [v38.0](#v380) - [Group Rate Limiter on Block Devices](#group-rate-limiter-on-block-devices) - [CPU Pinning Support for Block Device Worker Thread](#cpu-pinning-support-for-block-device-worker-thread) @@ -52,16 +59,16 @@ - [New 'debug-console' Device](#new-debug-console-device) - [Improved VFIO Device Support](#improved-vfio-device-support) - [Extended CPU Affinity Support](#extended-cpu-affinity-support) - - [Notable Bug Fixes](#notable-bug-fixes-8) - - [Contributors](#contributors-8) + - [Notable Bug Fixes](#notable-bug-fixes-9) + - [Contributors](#contributors-9) - [v37.0](#v370) - [Long Term Support (LTS) Release](#long-term-support-lts-release) - [Multiple PCI segments Support for 32-bit VFIO devices](#multiple-pci-segments-support-for-32-bit-vfio-devices) - [Configurable Named TAP Devices](#configurable-named-tap-devices) - [TTY Output from Both Serial Device and Virtio Console](#tty-output-from-both-serial-device-and-virtio-console) - [Faster VM Restoration from Snapshots](#faster-vm-restoration-from-snapshots) - - [Notable Bug Fixes](#notable-bug-fixes-9) - - [Contributors](#contributors-9) + - [Notable Bug Fixes](#notable-bug-fixes-10) + - [Contributors](#contributors-10) - [v36.0](#v360) - [Command Line Changes](#command-line-changes) - [Enabled Features Reported via API Endpoint and CLI](#enabled-features-reported-via-api-endpoint-and-cli) @@ -70,31 +77,31 @@ - [Unix Socket Backend for Serial Port](#unix-socket-backend-for-serial-port) - [AIO Backend for Block Devices](#aio-backend-for-block-devices) - [Documentation Improvements](#documentation-improvements) - - [Notable Bug Fixes](#notable-bug-fixes-10) - - [Contributors](#contributors-10) + - [Notable Bug Fixes](#notable-bug-fixes-11) + - [Contributors](#contributors-11) - [v35.0](#v350) - [`virtio-vsock` Support for Linux Guest Kernel v6.3+](#virtio-vsock-support-for-linux-guest-kernel-v63) - [User Specified Serial Number for `virtio-block`](#user-specified-serial-number-for-virtio-block) - [vCPU TSC Frequency Included in Migration State](#vcpu-tsc-frequency-included-in-migration-state) - - [Notable Bug Fixes](#notable-bug-fixes-11) - - [Contributors](#contributors-11) + - [Notable Bug Fixes](#notable-bug-fixes-12) + - [Contributors](#contributors-12) - [v34.0](#v340) - [Paravirtualised Panic Device Support](#paravirtualised-panic-device-support) - [Improvements to VM Core Dump](#improvements-to-vm-core-dump) - [QCOW2 Support for Backing Files](#qcow2-support-for-backing-files) - [Minimum Host Kernel Bump](#minimum-host-kernel-bump) - - [Notable Bug Fixes](#notable-bug-fixes-12) - - [Contributors](#contributors-12) + - [Notable Bug Fixes](#notable-bug-fixes-13) + - [Contributors](#contributors-13) - [v33.0](#v330) - [D-Bus based API](#d-bus-based-api) - [Expose Host CPU Cache Details for AArch64](#expose-host-cpu-cache-details-for-aarch64) - - [Notable Bug Fixes](#notable-bug-fixes-13) - - [Contributors](#contributors-13) + - [Notable Bug Fixes](#notable-bug-fixes-14) + - [Contributors](#contributors-14) - [v32.0](#v320) - [Increased PCI Segment Limit](#increased-pci-segment-limit) - [API Changes](#api-changes) - - [Notable Bug Fixes](#notable-bug-fixes-14) - - [Contributors](#contributors-14) + - [Notable Bug Fixes](#notable-bug-fixes-15) + - [Contributors](#contributors-15) - [v31.1](#v311) - [v31.0](#v310) - [Update to Latest `acpi_tables`](#update-to-latest-acpi_tables) @@ -102,15 +109,15 @@ - [Improvements on Console `SIGWINCH` Handler](#improvements-on-console-sigwinch-handler) - [Remove Directory Support from `MemoryZoneConfig::file`](#remove-directory-support-from-memoryzoneconfigfile) - [Documentation Improvements](#documentation-improvements-1) - - [Notable Bug Fixes](#notable-bug-fixes-15) - - [Contributors](#contributors-15) + - [Notable Bug Fixes](#notable-bug-fixes-16) + - [Contributors](#contributors-16) - [v30.0](#v300) - [Command Line Changes for Reduced Binary Size](#command-line-changes-for-reduced-binary-size) - [Basic vfio-user Server Support](#basic-vfio-user-server-support) - [Heap Profiling Support](#heap-profiling-support) - [Documentation Improvements](#documentation-improvements-2) - - [Notable Bug Fixes](#notable-bug-fixes-16) - - [Contributors](#contributors-16) + - [Notable Bug Fixes](#notable-bug-fixes-17) + - [Contributors](#contributors-17) - [v28.2](#v282) - [v29.0](#v290) - [Release Binary Supports Both MSHV and KVM](#release-binary-supports-both-mshv-and-kvm) @@ -120,10 +127,10 @@ - [`AArch64` Documentation Integration](#aarch64-documentation-integration) - [`virtio-block` Counters Enhancement](#virtio-block-counters-enhancement) - [TCP Offload Control](#tcp-offload-control) - - [Notable Bug Fixes](#notable-bug-fixes-17) + - [Notable Bug Fixes](#notable-bug-fixes-18) - [Removals](#removals) - - [Deprecations](#deprecations) - - [Contributors](#contributors-17) + - [Deprecations](#deprecations-1) + - [Contributors](#contributors-18) - [v28.1](#v281) - [v28.0](#v280) - [Community Engagement (Reminder)](#community-engagement-reminder) @@ -131,9 +138,9 @@ - [Virtualised TPM Support](#virtualised-tpm-support) - [Transparent Huge Page Support](#transparent-huge-page-support) - [README Quick Start Improved](#readme-quick-start-improved) - - [Notable Bug Fixes](#notable-bug-fixes-18) + - [Notable Bug Fixes](#notable-bug-fixes-19) - [Removals](#removals-1) - - [Contributors](#contributors-18) + - [Contributors](#contributors-19) - [v27.0](#v270) - [Community Engagement](#community-engagement) - [Prebuilt Packages](#prebuilt-packages) @@ -142,41 +149,41 @@ - [Simplified Build Feature Flags](#simplified-build-feature-flags) - [Asynchronous Kernel Loading](#asynchronous-kernel-loading) - [GDB Support for AArch64](#gdb-support-for-aarch64) - - [Notable Bug Fixes](#notable-bug-fixes-19) - - [Deprecations](#deprecations-1) - - [Contributors](#contributors-19) + - [Notable Bug Fixes](#notable-bug-fixes-20) + - [Deprecations](#deprecations-2) + - [Contributors](#contributors-20) - [v26.0](#v260) - [SMBIOS Improvements via `--platform`](#smbios-improvements-via---platform) - [Unified Binary MSHV and KVM Support](#unified-binary-mshv-and-kvm-support) - - [Notable Bug Fixes](#notable-bug-fixes-20) - - [Deprecations](#deprecations-2) + - [Notable Bug Fixes](#notable-bug-fixes-21) + - [Deprecations](#deprecations-3) - [Removals](#removals-2) - - [Contributors](#contributors-20) + - [Contributors](#contributors-21) - [v25.0](#v250) - [`ch-remote` Improvements](#ch-remote-improvements-1) - [VM "Coredump" Support](#vm-coredump-support) - - [Notable Bug Fixes](#notable-bug-fixes-21) + - [Notable Bug Fixes](#notable-bug-fixes-22) - [Removals](#removals-3) - - [Contributors](#contributors-21) + - [Contributors](#contributors-22) - [v24.0](#v240) - [Bypass Mode for `virtio-iommu`](#bypass-mode-for-virtio-iommu) - [Ensure Identifiers Uniqueness](#ensure-identifiers-uniqueness) - [Sparse Mmap support](#sparse-mmap-support) - [Expose Platform Serial Number](#expose-platform-serial-number) - - [Notable Bug Fixes](#notable-bug-fixes-22) + - [Notable Bug Fixes](#notable-bug-fixes-23) - [Notable Improvements](#notable-improvements) - - [Deprecations](#deprecations-3) + - [Deprecations](#deprecations-4) - [New on the Website](#new-on-the-website) - - [Contributors](#contributors-22) + - [Contributors](#contributors-23) - [v23.1](#v231) - [v23.0](#v230) - [vDPA Support](#vdpa-support) - [Updated OS Support list](#updated-os-support-list) - [`AArch64` Memory Map Improvements](#aarch64-memory-map-improvements) - [`AMX` Support](#amx-support) - - [Notable Bug Fixes](#notable-bug-fixes-23) - - [Deprecations](#deprecations-4) - - [Contributors](#contributors-23) + - [Notable Bug Fixes](#notable-bug-fixes-24) + - [Deprecations](#deprecations-5) + - [Contributors](#contributors-24) - [v22.1](#v221) - [v22.0](#v220) - [GDB Debug Stub Support](#gdb-debug-stub-support) @@ -187,13 +194,13 @@ - [PMU Support for AArch64](#pmu-support-for-aarch64) - [Documentation Under CC-BY-4.0 License](#documentation-under-cc-by-40-license) - [Deprecation of "Classic" `virtiofsd`](#deprecation-of-classic-virtiofsd) - - [Notable Bug Fixes](#notable-bug-fixes-24) - - [Contributors](#contributors-24) + - [Notable Bug Fixes](#notable-bug-fixes-25) + - [Contributors](#contributors-25) - [v21.0](#v210) - [Efficient Local Live Migration (for Live Upgrade)](#efficient-local-live-migration-for-live-upgrade) - [Recommended Kernel is Now 5.15](#recommended-kernel-is-now-515) - - [Notable Bug fixes](#notable-bug-fixes-25) - - [Contributors](#contributors-25) + - [Notable Bug fixes](#notable-bug-fixes-26) + - [Contributors](#contributors-26) - [v20.2](#v202) - [v20.1](#v201) - [v20.0](#v200) @@ -202,8 +209,8 @@ - [Improved VFIO support](#improved-vfio-support) - [Safer code](#safer-code) - [Extended documentation](#extended-documentation) - - [Notable bug fixes](#notable-bug-fixes-26) - - [Contributors](#contributors-26) + - [Notable bug fixes](#notable-bug-fixes-27) + - [Contributors](#contributors-27) - [v19.0](#v190) - [Improved PTY handling for serial and `virtio-console`](#improved-pty-handling-for-serial-and-virtio-console) - [PCI boot time optimisations](#pci-boot-time-optimisations) @@ -211,8 +218,8 @@ - [Live migration enhancements](#live-migration-enhancements) - [`virtio-mem` support with `vfio-user`](#virtio-mem-support-with-vfio-user) - [AArch64 for `virtio-iommu`](#aarch64-for-virtio-iommu) - - [Notable bug fixes](#notable-bug-fixes-27) - - [Contributors](#contributors-27) + - [Notable bug fixes](#notable-bug-fixes-28) + - [Contributors](#contributors-28) - [v18.0](#v180) - [Experimental User Device (`vfio-user`) support](#experimental-user-device-vfio-user-support) - [Migration support for `vhost-user` devices](#migration-support-for-vhost-user-devices) @@ -222,31 +229,31 @@ - [Live migration on MSHV hypervisor](#live-migration-on-mshv-hypervisor) - [AArch64 CPU topology support](#aarch64-cpu-topology-support) - [Power button support on AArch64](#power-button-support-on-aarch64) - - [Notable bug fixes](#notable-bug-fixes-28) - - [Contributors](#contributors-28) + - [Notable bug fixes](#notable-bug-fixes-29) + - [Contributors](#contributors-29) - [v17.0](#v170) - [ARM64 NUMA support using ACPI](#arm64-numa-support-using-acpi) - [`Seccomp` support for MSHV backend](#seccomp-support-for-mshv-backend) - [Hotplug of `macvtap` devices](#hotplug-of-macvtap-devices) - [Improved SGX support](#improved-sgx-support) - [Inflight tracking for `vhost-user` devices](#inflight-tracking-for-vhost-user-devices) - - [Notable bug fixes](#notable-bug-fixes-29) - - [Contributors](#contributors-29) + - [Notable bug fixes](#notable-bug-fixes-30) + - [Contributors](#contributors-30) - [v16.0](#v160) - [Improved live migration support](#improved-live-migration-support) - [Improved `vhost-user` support](#improved-vhost-user-support) - [ARM64 ACPI and UEFI support](#arm64-acpi-and-uefi-support) - - [Notable bug fixes](#notable-bug-fixes-30) + - [Notable bug fixes](#notable-bug-fixes-31) - [Removed functionality](#removed-functionality) - - [Contributors](#contributors-30) + - [Contributors](#contributors-31) - [v15.0](#v150) - [Version numbering and stability guarantees](#version-numbering-and-stability-guarantees) - [Network device rate limiting](#network-device-rate-limiting) - [Support for runtime control of `virtio-net` guest offload](#support-for-runtime-control-of-virtio-net-guest-offload) - [`--api-socket` supports file descriptor parameter](#--api-socket-supports-file-descriptor-parameter) - [Bug fixes](#bug-fixes) - - [Deprecations](#deprecations-5) - - [Contributors](#contributors-31) + - [Deprecations](#deprecations-6) + - [Contributors](#contributors-32) - [v0.14.1](#v0141) - [v0.14.0](#v0140) - [Structured event monitoring](#structured-event-monitoring) @@ -255,8 +262,8 @@ - [Updated hotplug documentation](#updated-hotplug-documentation) - [PTY control for serial and `virtio-console`](#pty-control-for-serial-and-virtio-console) - [Block device rate limiting](#block-device-rate-limiting) - - [Deprecations](#deprecations-6) - - [Contributors](#contributors-32) + - [Deprecations](#deprecations-7) + - [Contributors](#contributors-33) - [v0.13.0](#v0130) - [Wider VFIO device support](#wider-vfio-device-support) - [Improved huge page support](#improved-huge-page-support) @@ -264,13 +271,13 @@ - [VHD disk image support](#vhd-disk-image-support) - [Improved Virtio device threading](#improved-virtio-device-threading) - [Clean shutdown support via synthetic power button](#clean-shutdown-support-via-synthetic-power-button) - - [Contributors](#contributors-33) + - [Contributors](#contributors-34) - [v0.12.0](#v0120) - [ARM64 enhancements](#arm64-enhancements) - [Removal of `vhost-user-net` and `vhost-user-block` self spawning](#removal-of-vhost-user-net-and-vhost-user-block-self-spawning) - [Migration of `vhost-user-fs` backend](#migration-of-vhost-user-fs-backend) - [Enhanced "info" API](#enhanced-info-api) - - [Contributors](#contributors-34) + - [Contributors](#contributors-35) - [v0.11.0](#v0110) - [`io_uring` support by default for `virtio-block`](#io_uring-support-by-default-for-virtio-block) - [Windows Guest Support](#windows-guest-support) @@ -282,15 +289,15 @@ - [Default Log Level Changed](#default-log-level-changed) - [New `--balloon` Parameter Added](#new---balloon-parameter-added) - [Experimental `virtio-watchdog` Support](#experimental-virtio-watchdog-support) - - [Notable Bug Fixes](#notable-bug-fixes-31) - - [Contributors](#contributors-35) + - [Notable Bug Fixes](#notable-bug-fixes-32) + - [Contributors](#contributors-36) - [v0.10.0](#v0100) - [`virtio-block` Support for Multiple Descriptors](#virtio-block-support-for-multiple-descriptors) - [Memory Zones](#memory-zones) - [`Seccomp` Sandbox Improvements](#seccomp-sandbox-improvements) - [Preliminary KVM HyperV Emulation Control](#preliminary-kvm-hyperv-emulation-control) - - [Notable Bug Fixes](#notable-bug-fixes-32) - - [Contributors](#contributors-36) + - [Notable Bug Fixes](#notable-bug-fixes-33) + - [Contributors](#contributors-37) - [v0.9.0](#v090) - [`io_uring` Based Block Device Support](#io_uring-based-block-device-support) - [Block and Network Device Statistics](#block-and-network-device-statistics) @@ -303,17 +310,17 @@ - [Enhancements to ARM64 Support](#enhancements-to-arm64-support) - [Intel SGX Support](#intel-sgx-support) - [`Seccomp` Sandbox Improvements](#seccomp-sandbox-improvements-1) - - [Notable Bug Fixes](#notable-bug-fixes-33) - - [Contributors](#contributors-37) + - [Notable Bug Fixes](#notable-bug-fixes-34) + - [Contributors](#contributors-38) - [v0.8.0](#v080) - [Experimental Snapshot and Restore Support](#experimental-snapshot-and-restore-support) - [Experimental ARM64 Support](#experimental-arm64-support) - [Support for Using 5-level Paging in Guests](#support-for-using-5-level-paging-in-guests) - [Virtio Device Interrupt Suppression for Network Devices](#virtio-device-interrupt-suppression-for-network-devices) - [`vhost_user_fs` Improvements](#vhost_user_fs-improvements) - - [Notable Bug Fixes](#notable-bug-fixes-34) + - [Notable Bug Fixes](#notable-bug-fixes-35) - [Command Line and API Changes](#command-line-and-api-changes) - - [Contributors](#contributors-38) + - [Contributors](#contributors-39) - [v0.7.0](#v070) - [Block, Network, Persistent Memory (PMEM), VirtioFS and Vsock hotplug](#block-network-persistent-memory-pmem-virtiofs-and-vsock-hotplug) - [Alternative `libc` Support](#alternative-libc-support) @@ -323,14 +330,14 @@ - [`Seccomp` Sandboxing](#seccomp-sandboxing) - [Updated Distribution Support](#updated-distribution-support) - [Command Line and API Changes](#command-line-and-api-changes-1) - - [Contributors](#contributors-39) + - [Contributors](#contributors-40) - [v0.6.0](#v060) - [Directly Assigned Devices Hotplug](#directly-assigned-devices-hotplug) - [Shared Filesystem Improvements](#shared-filesystem-improvements) - [Block and Networking IO Self Offloading](#block-and-networking-io-self-offloading) - [Command Line Interface](#command-line-interface) - [PVH Boot](#pvh-boot) - - [Contributors](#contributors-40) + - [Contributors](#contributors-41) - [v0.5.1](#v051) - [v0.5.0](#v050) - [Virtual Machine Dynamic Resizing](#virtual-machine-dynamic-resizing) @@ -338,7 +345,7 @@ - [New Interrupt Management Framework](#new-interrupt-management-framework) - [Development Tools](#development-tools) - [Kata Containers Integration](#kata-containers-integration) - - [Contributors](#contributors-41) + - [Contributors](#contributors-42) - [v0.4.0](#v040) - [Dynamic virtual CPUs addition](#dynamic-virtual-cpus-addition) - [Programmatic firmware tables generation](#programmatic-firmware-tables-generation) @@ -347,7 +354,7 @@ - [Userspace IOAPIC by default](#userspace-ioapic-by-default) - [PCI BAR reprogramming](#pci-bar-reprogramming) - [New `cloud-hypervisor` organization](#new-cloud-hypervisor-organization) - - [Contributors](#contributors-42) + - [Contributors](#contributors-43) - [v0.3.0](#v030) - [Block device offloading](#block-device-offloading) - [Network device backend](#network-device-backend) @@ -374,6 +381,66 @@ - [Unit testing](#unit-testing) - [Integration tests parallelization](#integration-tests-parallelization) +# v47.0 + +This release has been tracked in [v47.0 +group](https://github.com/orgs/cloud-hypervisor/projects/6/views/4?filterQuery=release%3A%22Release+47%22) +of our [roadmap project](https://github.com/orgs/cloud-hypervisor/projects/6/). + +### Block Device Error Reporting to the Guest + +Instead of exiting on I/O errors, the `virtio-block` device now reports +errors to the guest using `VIRTIO_BLK_S_IOERR`. It improves the user +experience particularly when the guest rootfs is not backed by the +affected block device. (#7107) + +### Nice Error Messages on Exit + +We now have the chain of errors being reported and printed nicely, when +Cloud Hypervisor or ch-remote exits on errors. (#7066) + +### Alphabetically Sorted CLI Options for ch-remote + +To improve readability, ch-remote now prints help information in +alphabetical order. (#7130) + +### Notable Bug Fixes + +* Error out early when block device serial is too long (#7124) +* Fix partial commands being discarded for `virtio-vsock` (#7195) +* Disable the broken interrupt support for the `rtc_pl031` device to + prevent spurious guest interrupts (#7199) + +### Deprecations + +* A default IP (`192.168.249.1`) and mask (`255.255.255.0`) are + currently assigned to the `virtio-net` device if no value is specified + by users. Such behavior is now deprecated. Users of this behavior will + receive a warning message and should make adjustments. The behavior + will be removed in two release cycles (v49.0). + +### Contributors + +Many thanks to everyone who has contributed to our release: + +* Alyssa Ross +* Bo Chen +* Demi Marie Obenour +* Gauthier Jolly +* Hengqi Chen +* Jinank Jain +* Jinrong Liang +* Jean-Philippe Brucker +* Maximilian Güntner +* Muminul Islam +* Nuno Das Neves +* Philipp Schuster +* Ruoqing He +* Songqian Li +* Wei Liu +* Yi Wang +* ninollei + # v46.0 This release has been tracked in [v46.0 From 03b22a510d90d74d02cec54404545eb1110c5b9d Mon Sep 17 00:00:00 2001 From: Bo Chen Date: Tue, 22 Jul 2025 19:08:07 +0000 Subject: [PATCH 030/294] tests: Disable 'test_virtio_block_dynamic_vhdx_expand' This issue is tracked via #7209. Signed-off-by: Bo Chen --- tests/integration.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/integration.rs b/tests/integration.rs index 0536864fd2..49eb6f0038 100644 --- a/tests/integration.rs +++ b/tests/integration.rs @@ -3381,6 +3381,7 @@ mod common_parallel { } #[test] + #[ignore = "See #7209"] fn test_virtio_block_dynamic_vhdx_expand() { const VIRTUAL_DISK_SIZE: u64 = 100 << 20; const EMPTY_VHDX_FILE_SIZE: u64 = 8 << 20; From 930a91186268f6e8ec60d628d0b4998bedc6172f Mon Sep 17 00:00:00 2001 From: Wei Liu Date: Thu, 17 Jul 2025 23:15:50 +0000 Subject: [PATCH 031/294] block: Simplify AsyncAdaptor It shouldn't be necessary to lock the file for the adaptor. This removes two layers of indirection for QcowDiskSync and VhdxDiskSync. Signed-off-by: Wei Liu --- block/src/lib.rs | 40 +++++++++++++++++++--------------------- block/src/qcow/mod.rs | 2 +- block/src/qcow_sync.rs | 24 +++++++++--------------- block/src/vhdx_sync.rs | 20 +++++++------------- 4 files changed, 36 insertions(+), 50 deletions(-) diff --git a/block/src/lib.rs b/block/src/lib.rs index b081c87734..aed9c0ab3c 100644 --- a/block/src/lib.rs +++ b/block/src/lib.rs @@ -39,7 +39,7 @@ use std::io::{self, IoSlice, IoSliceMut, Read, Seek, SeekFrom, Write}; use std::os::linux::fs::MetadataExt; use std::os::unix::io::AsRawFd; use std::path::Path; -use std::sync::{Arc, MutexGuard}; +use std::sync::Arc; use std::time::Instant; use std::{cmp, result}; @@ -646,10 +646,7 @@ pub fn block_io_uring_is_supported() -> bool { } } -pub trait AsyncAdaptor -where - F: Read + Write + Seek, -{ +pub trait AsyncAdaptor { fn read_vectored_sync( &mut self, offset: libc::off_t, @@ -657,7 +654,10 @@ where user_data: u64, eventfd: &EventFd, completion_list: &mut VecDeque<(u64, i32)>, - ) -> AsyncIoResult<()> { + ) -> AsyncIoResult<()> + where + Self: Read + Seek, + { // Convert libc::iovec into IoSliceMut let mut slices: SmallVec<[IoSliceMut; DEFAULT_DESCRIPTOR_VEC_SIZE]> = SmallVec::with_capacity(iovecs.len()); @@ -669,15 +669,13 @@ where } let result = { - let mut file = self.file(); - // Move the cursor to the right offset - file.seek(SeekFrom::Start(offset as u64)) + self.seek(SeekFrom::Start(offset as u64)) .map_err(AsyncIoError::ReadVectored)?; let mut r = 0; for b in slices.iter_mut() { - r += file.read(b).map_err(AsyncIoError::ReadVectored)?; + r += self.read(b).map_err(AsyncIoError::ReadVectored)?; } r }; @@ -695,7 +693,10 @@ where user_data: u64, eventfd: &EventFd, completion_list: &mut VecDeque<(u64, i32)>, - ) -> AsyncIoResult<()> { + ) -> AsyncIoResult<()> + where + Self: Write + Seek, + { // Convert libc::iovec into IoSlice let mut slices: SmallVec<[IoSlice; DEFAULT_DESCRIPTOR_VEC_SIZE]> = SmallVec::with_capacity(iovecs.len()); @@ -707,15 +708,13 @@ where } let result = { - let mut file = self.file(); - // Move the cursor to the right offset - file.seek(SeekFrom::Start(offset as u64)) + self.seek(SeekFrom::Start(offset as u64)) .map_err(AsyncIoError::WriteVectored)?; let mut r = 0; for b in slices.iter() { - r += file.write(b).map_err(AsyncIoError::WriteVectored)?; + r += self.write(b).map_err(AsyncIoError::WriteVectored)?; } r }; @@ -731,12 +730,13 @@ where user_data: Option, eventfd: &EventFd, completion_list: &mut VecDeque<(u64, i32)>, - ) -> AsyncIoResult<()> { + ) -> AsyncIoResult<()> + where + Self: Write, + { let result: i32 = { - let mut file = self.file(); - // Flush - file.flush().map_err(AsyncIoError::Fsync)?; + self.flush().map_err(AsyncIoError::Fsync)?; 0 }; @@ -748,8 +748,6 @@ where Ok(()) } - - fn file(&mut self) -> MutexGuard<'_, F>; } pub enum ImageType { diff --git a/block/src/qcow/mod.rs b/block/src/qcow/mod.rs index 6d74232ddf..7733ff0ceb 100644 --- a/block/src/qcow/mod.rs +++ b/block/src/qcow/mod.rs @@ -425,7 +425,7 @@ fn max_refcount_clusters(refcount_order: u32, cluster_size: u32, num_clusters: u /// # Ok(()) /// # } /// ``` -#[derive(Debug)] +#[derive(Clone, Debug)] pub struct QcowFile { raw_file: QcowRawFile, header: QcowHeader, diff --git a/block/src/qcow_sync.rs b/block/src/qcow_sync.rs index f07e245e01..332db24868 100644 --- a/block/src/qcow_sync.rs +++ b/block/src/qcow_sync.rs @@ -6,7 +6,6 @@ use std::collections::VecDeque; use std::fs::File; use std::io::{Seek, SeekFrom}; use std::os::fd::AsRawFd; -use std::sync::{Arc, Mutex, MutexGuard}; use vmm_sys_util::eventfd::EventFd; @@ -17,22 +16,22 @@ use crate::qcow::{QcowFile, RawFile, Result as QcowResult}; use crate::AsyncAdaptor; pub struct QcowDiskSync { - qcow_file: Arc>, + qcow_file: QcowFile, } impl QcowDiskSync { pub fn new(file: File, direct_io: bool) -> QcowResult { Ok(QcowDiskSync { - qcow_file: Arc::new(Mutex::new(QcowFile::from(RawFile::new(file, direct_io))?)), + qcow_file: QcowFile::from(RawFile::new(file, direct_io))?, }) } } impl DiskFile for QcowDiskSync { fn size(&mut self) -> DiskFileResult { - let mut file = self.qcow_file.lock().unwrap(); - - file.seek(SeekFrom::End(0)).map_err(DiskFileError::Size) + self.qcow_file + .seek(SeekFrom::End(0)) + .map_err(DiskFileError::Size) } fn new_async_io(&self, _ring_depth: u32) -> DiskFileResult> { @@ -40,19 +39,18 @@ impl DiskFile for QcowDiskSync { } fn fd(&mut self) -> BorrowedDiskFd<'_> { - let lock = self.qcow_file.lock().unwrap(); - BorrowedDiskFd::new(lock.as_raw_fd()) + BorrowedDiskFd::new(self.qcow_file.as_raw_fd()) } } pub struct QcowSync { - qcow_file: Arc>, + qcow_file: QcowFile, eventfd: EventFd, completion_list: VecDeque<(u64, i32)>, } impl QcowSync { - pub fn new(qcow_file: Arc>) -> Self { + pub fn new(qcow_file: QcowFile) -> Self { QcowSync { qcow_file, eventfd: EventFd::new(libc::EFD_NONBLOCK) @@ -62,11 +60,7 @@ impl QcowSync { } } -impl AsyncAdaptor for Arc> { - fn file(&mut self) -> MutexGuard<'_, QcowFile> { - self.lock().unwrap() - } -} +impl AsyncAdaptor for QcowFile {} impl AsyncIo for QcowSync { fn notifier(&self) -> &EventFd { diff --git a/block/src/vhdx_sync.rs b/block/src/vhdx_sync.rs index d832f5e3cc..fd5888e638 100644 --- a/block/src/vhdx_sync.rs +++ b/block/src/vhdx_sync.rs @@ -5,7 +5,6 @@ use std::collections::VecDeque; use std::fs::File; use std::os::fd::AsRawFd; -use std::sync::{Arc, Mutex, MutexGuard}; use vmm_sys_util::eventfd::EventFd; @@ -16,20 +15,20 @@ use crate::vhdx::{Result as VhdxResult, Vhdx}; use crate::AsyncAdaptor; pub struct VhdxDiskSync { - vhdx_file: Arc>, + vhdx_file: Vhdx, } impl VhdxDiskSync { pub fn new(f: File) -> VhdxResult { Ok(VhdxDiskSync { - vhdx_file: Arc::new(Mutex::new(Vhdx::new(f)?)), + vhdx_file: Vhdx::new(f)?, }) } } impl DiskFile for VhdxDiskSync { fn size(&mut self) -> DiskFileResult { - Ok(self.vhdx_file.lock().unwrap().virtual_disk_size()) + Ok(self.vhdx_file.virtual_disk_size()) } fn new_async_io(&self, _ring_depth: u32) -> DiskFileResult> { @@ -40,19 +39,18 @@ impl DiskFile for VhdxDiskSync { } fn fd(&mut self) -> BorrowedDiskFd<'_> { - let lock = self.vhdx_file.lock().unwrap(); - BorrowedDiskFd::new(lock.as_raw_fd()) + BorrowedDiskFd::new(self.vhdx_file.as_raw_fd()) } } pub struct VhdxSync { - vhdx_file: Arc>, + vhdx_file: Vhdx, eventfd: EventFd, completion_list: VecDeque<(u64, i32)>, } impl VhdxSync { - pub fn new(vhdx_file: Arc>) -> std::io::Result { + pub fn new(vhdx_file: Vhdx) -> std::io::Result { Ok(VhdxSync { vhdx_file, eventfd: EventFd::new(libc::EFD_NONBLOCK)?, @@ -61,11 +59,7 @@ impl VhdxSync { } } -impl AsyncAdaptor for Arc> { - fn file(&mut self) -> MutexGuard<'_, Vhdx> { - self.lock().unwrap() - } -} +impl AsyncAdaptor for Vhdx {} impl AsyncIo for VhdxSync { fn notifier(&self) -> &EventFd { From 6d70c570f35ce54cfabf51206f2870a8987217b3 Mon Sep 17 00:00:00 2001 From: Oliver Anderson Date: Tue, 22 Jul 2025 10:01:55 +0200 Subject: [PATCH 032/294] tpm: Use the same anyhow version as other workspace members The other workspace members in the Cloud-hypervisor workspace currently declare anyhow version 1.0.94, but the tpm crate has an older version. This inconsistency is addressed by this PR which opens the door for declaring anyhow as a workspace dependency. Signed-off-by: Oliver Anderson On-behalf-of: SAP oliver.anderson@sap.com --- tpm/Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tpm/Cargo.toml b/tpm/Cargo.toml index ab257ec189..0b50049943 100644 --- a/tpm/Cargo.toml +++ b/tpm/Cargo.toml @@ -6,7 +6,7 @@ name = "tpm" version = "0.1.0" [dependencies] -anyhow = "1.0.81" +anyhow = "1.0.94" libc = "0.2.153" log = "0.4.21" net_gen = { path = "../net_gen" } From 2ae5b2567b40a782d3b18c33ca3f0da7139b4829 Mon Sep 17 00:00:00 2001 From: Oliver Anderson Date: Wed, 23 Jul 2025 21:01:07 +0200 Subject: [PATCH 033/294] tpm: Use the same libc crate version as other workspace members The other workspace members in the Cloud-hypervisor workspace currently declare libc crate version 0.2.167, but the tpm crate has an older version. This inconsistency is addressed by this PR which opens the door for declaring libc as a workspace dependency. Signed-off-by: Oliver Anderson On-behalf-of: SAP oliver.anderson@sap.com --- tpm/Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tpm/Cargo.toml b/tpm/Cargo.toml index 0b50049943..21cb222d5a 100644 --- a/tpm/Cargo.toml +++ b/tpm/Cargo.toml @@ -7,7 +7,7 @@ version = "0.1.0" [dependencies] anyhow = "1.0.94" -libc = "0.2.153" +libc = "0.2.167" log = "0.4.21" net_gen = { path = "../net_gen" } thiserror = { workspace = true } From f6568042ce530d3a46ba267cd00ba2d7e7220075 Mon Sep 17 00:00:00 2001 From: Wei Liu Date: Wed, 23 Jul 2025 18:51:22 +0000 Subject: [PATCH 034/294] tests: Fix an error message in VHDX expansion test The VHDX image is generated directly, not converted from a RAW image. Signed-off-by: Wei Liu --- tests/integration.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/integration.rs b/tests/integration.rs index 49eb6f0038..da2d5029c9 100644 --- a/tests/integration.rs +++ b/tests/integration.rs @@ -3402,7 +3402,7 @@ mod common_parallel { .arg(vhdx_path) .arg(VIRTUAL_DISK_SIZE.to_string()) .output() - .expect("Expect generating dynamic VHDx image from RAW image"); + .expect("Expect generating dynamic VHDX image"); // Check if the size matches with empty VHDx file size assert_eq!(vhdx_image_size(vhdx_path), EMPTY_VHDX_FILE_SIZE); From 4cae96f0706508d8abc074f903658be220295243 Mon Sep 17 00:00:00 2001 From: Wei Liu Date: Wed, 23 Jul 2025 18:58:26 +0000 Subject: [PATCH 035/294] tests: Avoid races in the VHDX expansion test Generate the data disk under a temporary directory so that multiple instances of the test suites can run at the same time. Signed-off-by: Wei Liu --- tests/integration.rs | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) diff --git a/tests/integration.rs b/tests/integration.rs index da2d5029c9..62cbea9dea 100644 --- a/tests/integration.rs +++ b/tests/integration.rs @@ -3388,12 +3388,12 @@ mod common_parallel { const FULL_VHDX_FILE_SIZE: u64 = 112 << 20; const DYNAMIC_VHDX_NAME: &str = "dynamic.vhdx"; - let mut workload_path = dirs::home_dir().unwrap(); - workload_path.push("workloads"); + let focal = UbuntuDiskConfig::new(FOCAL_IMAGE_NAME.to_string()); + let guest = Guest::new(Box::new(focal)); + let kernel_path = direct_kernel_boot_path(); - let mut vhdx_file_path = workload_path; - vhdx_file_path.push(DYNAMIC_VHDX_NAME); - let vhdx_path = vhdx_file_path.to_str().unwrap(); + let vhdx_pathbuf = guest.tmp_dir.as_path().join(DYNAMIC_VHDX_NAME); + let vhdx_path = vhdx_pathbuf.to_str().unwrap(); // Generate a 100 MiB dynamic VHDX file std::process::Command::new("qemu-img") @@ -3407,10 +3407,6 @@ mod common_parallel { // Check if the size matches with empty VHDx file size assert_eq!(vhdx_image_size(vhdx_path), EMPTY_VHDX_FILE_SIZE); - let focal = UbuntuDiskConfig::new(FOCAL_IMAGE_NAME.to_string()); - let guest = Guest::new(Box::new(focal)); - let kernel_path = direct_kernel_boot_path(); - let mut cloud_child = GuestCommand::new(&guest) .args(["--cpus", "boot=1"]) .args(["--memory", "size=512M"]) From 2d9fc3beb63dc4fe72f478ea65bcc2cf4fe73e2e Mon Sep 17 00:00:00 2001 From: Wei Liu Date: Wed, 23 Jul 2025 19:29:58 +0000 Subject: [PATCH 036/294] tests: Reenable test_virtio_block_dynamic_vhdx_expand Signed-off-by: Wei Liu --- tests/integration.rs | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/integration.rs b/tests/integration.rs index 62cbea9dea..d754ccff8a 100644 --- a/tests/integration.rs +++ b/tests/integration.rs @@ -3381,7 +3381,6 @@ mod common_parallel { } #[test] - #[ignore = "See #7209"] fn test_virtio_block_dynamic_vhdx_expand() { const VIRTUAL_DISK_SIZE: u64 = 100 << 20; const EMPTY_VHDX_FILE_SIZE: u64 = 8 << 20; From 5f2392c0958f719b895b23cbb9ca4975953d0fb0 Mon Sep 17 00:00:00 2001 From: Wei Liu Date: Wed, 23 Jul 2025 22:37:19 +0000 Subject: [PATCH 037/294] tests: Avoid repeatedly downloading files from GitHub Running one or two tests in a tight loop can cause the download functions to quickly hit GitHub's API rate limit. That causes the test script to fail for no apparent reason. Signed-off-by: Wei Liu --- scripts/run_integration_tests_x86_64.sh | 8 ++++++-- scripts/sha1sums-x86_64 | 2 ++ 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/scripts/run_integration_tests_x86_64.sh b/scripts/run_integration_tests_x86_64.sh index 11f5d6664a..4fec7d04c9 100755 --- a/scripts/run_integration_tests_x86_64.sh +++ b/scripts/run_integration_tests_x86_64.sh @@ -20,9 +20,13 @@ fi cp scripts/sha1sums-x86_64 "$WORKLOADS_DIR" -download_hypervisor_fw +if [ ! -f "$WORKLOADS_DIR/hypervisor-fw" ]; then + download_hypervisor_fw +fi -download_ovmf +if [ ! -f "$WORKLOADS_DIR/CLOUDHV.fd" ]; then + download_ovmf +fi FOCAL_OS_IMAGE_NAME="focal-server-cloudimg-amd64-custom-20210609-0.qcow2" FOCAL_OS_IMAGE_URL="https://ch-images.azureedge.net/$FOCAL_OS_IMAGE_NAME" diff --git a/scripts/sha1sums-x86_64 b/scripts/sha1sums-x86_64 index 7fd5c56d87..e198816035 100644 --- a/scripts/sha1sums-x86_64 +++ b/scripts/sha1sums-x86_64 @@ -3,3 +3,5 @@ f1eccdc5e1b515dbad294426ab081b47ebfb97c0 focal-server-cloudimg-amd64-custom-2021 7f5a8358243a96adf61f5c20139b29f308f2c0e3 focal-server-cloudimg-amd64-custom-20210609-0.raw 5f10738920efb74f0bf854cadcd1b1fd544e49c8 jammy-server-cloudimg-amd64-custom-20241017-0.qcow2 c1dfbe7abde400e675844568dbe9d3914222f6de jammy-server-cloudimg-amd64-custom-20241017-0.raw +540ac358429305d7aa94e15363665d1c9d845982 hypervisor-fw +4e96fd0914a44005d40707b2b0c7e829e4086bd5 CLOUDHV.fd From 423280fb96662d7866cc8f36f3b90c59bc5fc193 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Tue, 22 Jul 2025 23:18:29 +0000 Subject: [PATCH 038/294] build: Bump enumflags2 from 0.7.10 to 0.7.12 Bumps [enumflags2](https://github.com/meithecatte/enumflags2) from 0.7.10 to 0.7.12. - [Release notes](https://github.com/meithecatte/enumflags2/releases) - [Commits](https://github.com/meithecatte/enumflags2/compare/v0.7.10...v0.7.12) --- updated-dependencies: - dependency-name: enumflags2 dependency-version: 0.7.12 dependency-type: indirect update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] --- Cargo.lock | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 305e31cee0..c67d21cb2f 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -590,9 +590,9 @@ checksum = "a3d8a32ae18130a3c84dd492d4215c3d913c3b07c6b63c2eb3eb7ff1101ab7bf" [[package]] name = "enumflags2" -version = "0.7.10" +version = "0.7.12" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d232db7f5956f3f14313dc2f87985c58bd2c695ce124c8cdd984e08e15ac133d" +checksum = "1027f7680c853e056ebcec683615fb6fbbc07dbaa13b4d5d9442b146ded4ecef" dependencies = [ "enumflags2_derive", "serde", @@ -600,9 +600,9 @@ dependencies = [ [[package]] name = "enumflags2_derive" -version = "0.7.10" +version = "0.7.12" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "de0d48a183585823424a4ce1aa132d174a6a81bd540895822eb4c8373a8e49e8" +checksum = "67c78a4d8fdf9953a5c9d458f9efe940fd97a0cab0941c075a813ac594733827" dependencies = [ "proc-macro2", "quote", From 76d8d47f6acd22c364f9dd8a9ffd3c9ad645401c Mon Sep 17 00:00:00 2001 From: Wei Liu Date: Thu, 17 Jul 2025 00:47:04 +0000 Subject: [PATCH 039/294] performance-metrics: Initialize tests after setting overrides No functional change. Signed-off-by: Wei Liu --- performance-metrics/src/main.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/performance-metrics/src/main.rs b/performance-metrics/src/main.rs index e029b10d8d..8c1b669bbf 100644 --- a/performance-metrics/src/main.rs +++ b/performance-metrics/src/main.rs @@ -712,8 +712,6 @@ fn main() { // Run performance tests sequentially and report results (in both readable/json format) let mut metrics_report: MetricsReport = Default::default(); - init_tests(); - let overrides = Arc::new(PerformanceTestOverrides { test_iterations: cmd_arguments .get_one::("iterations") @@ -727,6 +725,8 @@ fn main() { .unwrap_or_default(), }); + init_tests(); + for test in test_list.iter() { if test_filter.is_empty() || test_filter.iter().any(|&s| test.name.contains(s)) { match run_test_with_timeout(test, &overrides) { From 8a26380657fae54f5ce0ead68fb61737414151e2 Mon Sep 17 00:00:00 2001 From: Wei Liu Date: Thu, 17 Jul 2025 00:20:24 +0000 Subject: [PATCH 040/294] performance-metrics: Add all supported formats to block tests Signed-off-by: Wei Liu --- performance-metrics/src/main.rs | 55 +++++++++++++++++++- performance-metrics/src/performance_tests.rs | 32 ++++++++---- 2 files changed, 77 insertions(+), 10 deletions(-) diff --git a/performance-metrics/src/main.rs b/performance-metrics/src/main.rs index 8c1b669bbf..c348155817 100644 --- a/performance-metrics/src/main.rs +++ b/performance-metrics/src/main.rs @@ -106,10 +106,45 @@ impl Default for MetricsReport { } } +#[derive(Clone, Copy, Default)] +pub enum ImageFormat { + #[default] + Raw, + Qcow2, + Vhd, + Vhdx, +} + +impl std::str::FromStr for ImageFormat { + type Err = (); + + fn from_str(s: &str) -> Result { + match s { + "raw" => Ok(ImageFormat::Raw), + "qcow2" => Ok(ImageFormat::Qcow2), + "vhd" => Ok(ImageFormat::Vhd), + "vhdx" => Ok(ImageFormat::Vhdx), + _ => Err(()), + } + } +} + +impl fmt::Display for ImageFormat { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match self { + ImageFormat::Raw => write!(f, "raw"), + ImageFormat::Qcow2 => write!(f, "qcow2"), + ImageFormat::Vhd => write!(f, "vhd"), + ImageFormat::Vhdx => write!(f, "vhdx"), + } + } +} + #[derive(Default)] pub struct PerformanceTestOverrides { test_iterations: Option, test_timeout: Option, + test_image_format: Option, } impl fmt::Display for PerformanceTestOverrides { @@ -121,6 +156,10 @@ impl fmt::Display for PerformanceTestOverrides { write!(f, "test_timeout = {test_timeout}")?; } + if let Some(test_image_format) = self.test_image_format { + write!(f, "test_image_format = {test_image_format}")?; + } + Ok(()) } } @@ -686,6 +725,15 @@ fn main() { .help("Override test timeout, Ex. --timeout 5") .num_args(1), ) + .arg( + Arg::new("image-format") + .long("image-format") + .help( + "Override the image format used for block tests, supported values: qcow2, raw, vhd, vhdx. \ + Default is 'raw'.", + ) + .num_args(1), + ) .get_matches(); // It seems that the tool (ethr) used for testing the virtio-net latency @@ -723,9 +771,14 @@ fn main() { .map(|s| s.parse()) .transpose() .unwrap_or_default(), + test_image_format: cmd_arguments + .get_one::("image-format") + .map(|s| s.parse()) + .transpose() + .unwrap_or_default(), }); - init_tests(); + init_tests(&overrides); for test in test_list.iter() { if test_filter.is_empty() || test_filter.iter().any(|&s| test.name.contains(s)) { diff --git a/performance-metrics/src/performance_tests.rs b/performance-metrics/src/performance_tests.rs index a2d7004900..46eb090fcc 100644 --- a/performance-metrics/src/performance_tests.rs +++ b/performance-metrics/src/performance_tests.rs @@ -12,7 +12,7 @@ use std::{fs, thread}; use test_infra::{Error as InfraError, *}; use thiserror::Error; -use crate::{mean, PerformanceTestControl}; +use crate::{mean, ImageFormat, PerformanceTestControl, PerformanceTestOverrides}; #[cfg(target_arch = "x86_64")] pub const FOCAL_IMAGE_NAME: &str = "focal-server-cloudimg-amd64-custom-20210609-0.raw"; @@ -30,16 +30,30 @@ enum Error { RestoreTimeParse, } +// The test image cannot be created on tmpfs (e.g. /tmp) filesystem, +// as tmpfs does not support O_DIRECT const BLK_IO_TEST_IMG: &str = "/var/tmp/ch-blk-io-test.img"; -pub fn init_tests() { - // The test image cannot be created on tmpfs (e.g. /tmp) filesystem, - // as tmpfs does not support O_DIRECT - assert!(exec_host_command_output(&format!( - "dd if=/dev/zero of={BLK_IO_TEST_IMG} bs=1M count=4096" - )) - .status - .success()); +pub fn init_tests(overrides: &PerformanceTestOverrides) { + let mut cmd = format!("dd if=/dev/zero of={BLK_IO_TEST_IMG} bs=1M count=4096"); + + if let Some(o) = overrides.test_image_format { + match o { + ImageFormat::Raw => { /* Nothing to do */ } + ImageFormat::Qcow2 => { + cmd = + format!("qemu-img create -f qcow2 -o preallocation=full {BLK_IO_TEST_IMG} 4G"); + } + ImageFormat::Vhd => { + cmd = format!("qemu-img create -f vpc -o subformat=fixed {BLK_IO_TEST_IMG} 4G"); + } + ImageFormat::Vhdx => { + cmd = format!("qemu-img create -f vhdx -o subformat=fixed {BLK_IO_TEST_IMG} 4G"); + } + } + } + + assert!(exec_host_command_output(&cmd).status.success()); } pub fn cleanup_tests() { From 511a100842664ffcfba34bf215de3dec80f3d216 Mon Sep 17 00:00:00 2001 From: Oliver Anderson Date: Wed, 23 Jul 2025 20:56:19 +0200 Subject: [PATCH 041/294] tpm: Use the same log version as other workspace members The other workspace members in the Cloud-hypervisor workspace currently declare log version 0.4.22, but the tpm crate has an older version. This inconsistency is addressed by this PR which opens the door for declaring log as a workspace dependency. Signed-off-by: Oliver Anderson On-behalf-of: SAP oliver.anderson@sap.com --- tpm/Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tpm/Cargo.toml b/tpm/Cargo.toml index 21cb222d5a..fa3461245d 100644 --- a/tpm/Cargo.toml +++ b/tpm/Cargo.toml @@ -8,7 +8,7 @@ version = "0.1.0" [dependencies] anyhow = "1.0.94" libc = "0.2.167" -log = "0.4.21" +log = "0.4.22" net_gen = { path = "../net_gen" } thiserror = { workspace = true } vmm-sys-util = { workspace = true } From 985dcfbf3e82a699d45ba1b93519d40faa7f4b69 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Thu, 24 Jul 2025 23:53:17 +0000 Subject: [PATCH 042/294] build: Bump async-signal from 0.2.11 to 0.2.12 Bumps [async-signal](https://github.com/smol-rs/async-signal) from 0.2.11 to 0.2.12. - [Release notes](https://github.com/smol-rs/async-signal/releases) - [Changelog](https://github.com/smol-rs/async-signal/blob/master/CHANGELOG.md) - [Commits](https://github.com/smol-rs/async-signal/compare/v0.2.11...v0.2.12) --- updated-dependencies: - dependency-name: async-signal dependency-version: 0.2.12 dependency-type: indirect update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] --- Cargo.lock | 81 +++++++++++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 77 insertions(+), 4 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index c67d21cb2f..b58a6c4563 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -221,9 +221,9 @@ dependencies = [ [[package]] name = "async-signal" -version = "0.2.11" +version = "0.2.12" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d7605a4e50d4b06df3898d5a70bf5fde51ed9059b0434b73105193bc27acce0d" +checksum = "f567af260ef69e1d52c2b560ce0ea230763e6fbb9214a85d768760a920e3e3c1" dependencies = [ "async-io", "async-lock", @@ -234,7 +234,7 @@ dependencies = [ "rustix 1.0.7", "signal-hook-registry", "slab", - "windows-sys 0.59.0", + "windows-sys 0.60.2", ] [[package]] @@ -2644,6 +2644,15 @@ dependencies = [ "windows-targets 0.52.6", ] +[[package]] +name = "windows-sys" +version = "0.60.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f2f500e4d28234f72040990ec9d39e3a6b950f9f22d3dba18416c35882612bcb" +dependencies = [ + "windows-targets 0.53.2", +] + [[package]] name = "windows-targets" version = "0.48.5" @@ -2668,13 +2677,29 @@ dependencies = [ "windows_aarch64_gnullvm 0.52.6", "windows_aarch64_msvc 0.52.6", "windows_i686_gnu 0.52.6", - "windows_i686_gnullvm", + "windows_i686_gnullvm 0.52.6", "windows_i686_msvc 0.52.6", "windows_x86_64_gnu 0.52.6", "windows_x86_64_gnullvm 0.52.6", "windows_x86_64_msvc 0.52.6", ] +[[package]] +name = "windows-targets" +version = "0.53.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c66f69fcc9ce11da9966ddb31a40968cad001c5bedeb5c2b82ede4253ab48aef" +dependencies = [ + "windows_aarch64_gnullvm 0.53.0", + "windows_aarch64_msvc 0.53.0", + "windows_i686_gnu 0.53.0", + "windows_i686_gnullvm 0.53.0", + "windows_i686_msvc 0.53.0", + "windows_x86_64_gnu 0.53.0", + "windows_x86_64_gnullvm 0.53.0", + "windows_x86_64_msvc 0.53.0", +] + [[package]] name = "windows_aarch64_gnullvm" version = "0.48.5" @@ -2687,6 +2712,12 @@ version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "32a4622180e7a0ec044bb555404c800bc9fd9ec262ec147edd5989ccd0c02cd3" +[[package]] +name = "windows_aarch64_gnullvm" +version = "0.53.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "86b8d5f90ddd19cb4a147a5fa63ca848db3df085e25fee3cc10b39b6eebae764" + [[package]] name = "windows_aarch64_msvc" version = "0.48.5" @@ -2699,6 +2730,12 @@ version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "09ec2a7bb152e2252b53fa7803150007879548bc709c039df7627cabbd05d469" +[[package]] +name = "windows_aarch64_msvc" +version = "0.53.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c7651a1f62a11b8cbd5e0d42526e55f2c99886c77e007179efff86c2b137e66c" + [[package]] name = "windows_i686_gnu" version = "0.48.5" @@ -2711,12 +2748,24 @@ version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8e9b5ad5ab802e97eb8e295ac6720e509ee4c243f69d781394014ebfe8bbfa0b" +[[package]] +name = "windows_i686_gnu" +version = "0.53.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c1dc67659d35f387f5f6c479dc4e28f1d4bb90ddd1a5d3da2e5d97b42d6272c3" + [[package]] name = "windows_i686_gnullvm" version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0eee52d38c090b3caa76c563b86c3a4bd71ef1a819287c19d586d7334ae8ed66" +[[package]] +name = "windows_i686_gnullvm" +version = "0.53.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9ce6ccbdedbf6d6354471319e781c0dfef054c81fbc7cf83f338a4296c0cae11" + [[package]] name = "windows_i686_msvc" version = "0.48.5" @@ -2729,6 +2778,12 @@ version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "240948bc05c5e7c6dabba28bf89d89ffce3e303022809e73deaefe4f6ec56c66" +[[package]] +name = "windows_i686_msvc" +version = "0.53.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "581fee95406bb13382d2f65cd4a908ca7b1e4c2f1917f143ba16efe98a589b5d" + [[package]] name = "windows_x86_64_gnu" version = "0.48.5" @@ -2741,6 +2796,12 @@ version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "147a5c80aabfbf0c7d901cb5895d1de30ef2907eb21fbbab29ca94c5b08b1a78" +[[package]] +name = "windows_x86_64_gnu" +version = "0.53.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2e55b5ac9ea33f2fc1716d1742db15574fd6fc8dadc51caab1c16a3d3b4190ba" + [[package]] name = "windows_x86_64_gnullvm" version = "0.48.5" @@ -2753,6 +2814,12 @@ version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "24d5b23dc417412679681396f2b49f3de8c1473deb516bd34410872eff51ed0d" +[[package]] +name = "windows_x86_64_gnullvm" +version = "0.53.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0a6e035dd0599267ce1ee132e51c27dd29437f63325753051e71dd9e42406c57" + [[package]] name = "windows_x86_64_msvc" version = "0.48.5" @@ -2765,6 +2832,12 @@ version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec" +[[package]] +name = "windows_x86_64_msvc" +version = "0.53.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "271414315aff87387382ec3d271b52d7ae78726f5d44ac98b4f4030c91880486" + [[package]] name = "winnow" version = "0.7.2" From 2c3e6cd93d66782ece23b307e52ca604ca7805eb Mon Sep 17 00:00:00 2001 From: Lucas Grosche Date: Wed, 16 Jul 2025 16:24:10 +0200 Subject: [PATCH 043/294] docs: Add Windows 11 support information The current documentation only mentions Windows server support. Since only minimal adjustments are needed for Windows 11 support, these were added where applicable. Also contains whitespace fixes, adds missing `\` in example code and adds more cross references. Signed-off-by: Lucas Grosche --- docs/windows.md | 134 ++++++++++++++++++++++++++++++++++++++---------- 1 file changed, 106 insertions(+), 28 deletions(-) diff --git a/docs/windows.md b/docs/windows.md index e4945d939b..ebb902fbfa 100644 --- a/docs/windows.md +++ b/docs/windows.md @@ -4,10 +4,10 @@ Starting with the release version [0.10.0](https://github.com/cloud-hypervisor/c __Requirements__ -- Host with KVM enabled +- Host with KVM enabled - [UEFI](uefi.md) capable Windows guest image with Virtio drivers integrated -Any modern Windows Server version is compatible. Cloud Hypervisor has been successfully tested with Windows Server 2019 and Windows Server Core 2004. +Any modern Windows Server version is compatible, as well as Windows 11. Cloud Hypervisor has been successfully tested with Windows Server 2019, Windows Server Core 2004 and Windows 11 IoT Enterprise LTSC 2024. At the current stage, only UEFI capable Windows images are supported. This implies the presence of the OVMF firmware during the Windows installation and in any subsequent usage. BIOS boot is not supported. @@ -20,10 +20,15 @@ The subsequent sections will tell, in detail, how to prepare an appropriate Wind __Prerequisites__ - QEMU, version >=5.0.0 is recommended. -- Windows installation ISO. Obtained through MSDN, Visual Studio subscription, evaluation center, etc. +- Windows installation ISO. Obtained through MSDN, Visual Studio subscription, evaluation center, etc. - [VirtIO driver ISO](https://fedorapeople.org/groups/virt/virtio-win/direct-downloads/stable-virtio/) + - Please use the [VirtIO Windows 11 attestation file](https://fedorapeople.org/groups/virt/virtio-win/direct-downloads/upstream-virtio/virtio-win11-attestation-0.1-258.zip) + for Windows 11 - Suitable firmware for Cloud Hypervisor (`CLOUDHV.fd`) and for QEMU (`OVMF.fd`) -- With the suggested image size of 30G, there should be enough free disk space to hold the installation ISO and any other necessary files +- With the suggested image size of 30G for Windows Server, there should be enough free disk space to hold the installation ISO and any other necessary files + - For Windows 11, increasing this image size to 64GB is recommended (see [minimal requirements](https://support.microsoft.com/en-us/windows/windows-11-system-requirements-86c11283-ea52-4782-9efd-7674389a7ba3)) +- Windows 11 only: TPM 2.0 support +- Windows 11 only: 2 or more cores This step currently requires QEMU to install Windows onto the guest. QEMU is only used at the preparation stage, the resulting image is then fully functional with Cloud Hypervisor. @@ -37,11 +42,13 @@ OVMF_DIR=./FV ``` Create an empty image file, `raw` is supported. + ```shell qemu-img create -f raw $IMG_FILE 30G ``` -Begin the Windows installation process under QEMU +Begin the Windows installation process under QEMU for Windows Server: + ```shell qemu-system-x86_64 \ -machine q35,accel=kvm \ @@ -57,13 +64,56 @@ qemu-system-x86_64 \ -vga std ``` -Before the installation can proceed, point the Windows installation program to the VirtIO disk and install the necessary storage controller drivers. After that, the attached hard drive will become visible and the actual installation can commence. +For Windows 11 you can use `swtpm` to fulfill the TPM 2.0 requirement: + +```shell +# Create directory to store state +mkdir -p /tmp/mytpm1 +# Start swtpm daemon for TPM 2.0 support +swtpm socket \ + --tpm2 \ + --ctrl type=unixio,path=/tmp/swtpm-sock \ + --tpmstate dir=/tmp/mytpm1 \ + --flags startup-clear \ + --log level=20 \ + --log file=/tmp/swtpm.log \ + --daemon +``` + +Begin the Windows 11 installation process under QEMU like this: + +```shell +qemu-system-x86_64 \ + -machine q35,accel=kvm \ + -cpu host \ + -m 4G \ + -bios ./$OVMF_DIR/OVMF.fd \ + -cdrom ./$WIN_ISO_FILE \ + -drive file=./$VIRTIO_ISO_FILE,index=0,media=cdrom \ + -drive if=none,id=root,file=./$IMG_FILE \ + -device virtio-blk-pci,drive=root,disable-legacy=on \ + -device virtio-net-pci,netdev=mynet0,disable-legacy=on \ + -netdev user,id=mynet0 \ + -vga std \ + -smp 4 \ + -chardev socket,id=chrtpm,path=/tmp/swtpm-sock \ + -tpmdev emulator,id=tpm0,chardev=chrtpm \ + -device tpm-tis,tpmdev=tpm0 +``` + +This command needs at least `-smp 2` (2 cores), as well as the last three lines (TPM 2.0), to support Windows 11 minimal requirements. Additionally, using `OVMF_CODE.fd` leads to the following error: `qemu: could not load PC BIOS '././FV/OVMF_CODE.fd'`. Switching to `OVMF.fd` is therefore necessary. + +For more details about TPM specifically, please continue with the [TPM documentation](./tpm.md). + +Before the installation can proceed, point the Windows installation program to the VirtIO disk and install the necessary storage controller drivers. For Windows 11 with the attestation drivers, you need to navigate to the `viostor` directory to be able to see and install it. After that, the attached hard drive will become visible and the actual installation can commence. -After the installation has completed, proceed further to the configuration section. QEMU will be needed at least once more to enable the Windows Special Administration Console (SAC) and to possibly install extra device drivers. +Do not install network drivers for Windows 11 just yet, if you don't want to be forced to log-in to/create a Microsoft account. Simply select `I don't have internet` for now. + +After the installation has completed, proceed further to the [configuration section](#image-configuration). QEMU will be needed at least once more to enable/install the Windows Special Administration Console (SAC) and to possibly install extra device drivers. ## Image Usage -The basic command to boot a Windows image. The configuration section should be checked before executing it for the first time. +The basic command to boot a Windows image is shown in the next code snippet. The [configuration section](#image-configuration), as well as the [Getting Started section](../README.md#2-getting-started) should be checked before executing it for the first time. Please especially read the documentation for giving the cloud-hypervisor binary the correct capabilities for it to set TAP interfaces up on the host, otherwise the command below will fail: ```shell cloud-hypervisor \ @@ -85,19 +135,25 @@ In cases where the host processor supports address space > 39 bits, it might be To daemonize the Cloud Hypervisor process, `nohup` can be used. Some STDIO redirections might need to be done. In a simple case it is sufficient to just redirect all the output to `/dev/null`. +Be aware, currently, running the Windows 11 VM on Cloud Hypervisor with TPM 2.0 was not proven successful: `thread 'vcpu0' panicked`. Running the VM without TPM is a valid option though. Therefore the command as shown above is also valid for a Windows 11 VM. + ## Image Configuration ### Device Drivers After the Windows installation has finished under QEMU, there might be still devices with no drivers installed. This might happen for example, when a device was not used during the installation. In particular it is important to ensure that the VirtIO network device is setup correctly because further steps for the configuration and the usage require network in most case. -Boot once more under QEMU and use the [Device Manager](https://support.microsoft.com/en-in/help/4028443/windows-10-update-drivers), to ensure all the device drivers, and especially the network card, are installed correctly. Also, as Cloud Hypervisor can introduce new devices, it is advisable to repeat the procedure while booted under Cloud Hypervisor, when the RDP access to the image is functional. +Boot once more under QEMU and use the [Device Manager](https://support.microsoft.com/en-in/help/4028443/windows-10-update-drivers), to ensure all the device drivers, and especially the network card, are installed correctly. If not, right click on the unknown network device, choose `Update driver` and browse to the `NetKvm` directory on the CD. + +Also, as Cloud Hypervisor can introduce new devices, it is advisable to repeat the procedure while booted under Cloud Hypervisor, when the [RDP](#remote-desktop-protocol-rdp-enablement) access to the image is functional. ### Windows Special Administration Console (SAC) enablement SAC provides a text based console access to the Windows guest. As Cloud Hypervisor doesn't implement a VGA adaptor, SAC is an important instrument for the Windows guest management. -Boot the Windows image under QEMU and execute the below commands to permanently enable SAC +Boot the Windows image under QEMU. For all non-server Windows versions, the SAC needs to be downloaded and enabled first in the `Optional features` menu of Windows. + +Execute the below commands to permanently enable SAC. You might need admin privileges. ```cmd bcdedit /emssettings emsport:1 emsbaudrate:115200 @@ -105,15 +161,14 @@ bcdedit /ems on bcdedit /bootems on ``` -Once SAC is enabled, the image can be booted under Cloud Hypervisor. The SAC prompt will show up +Once SAC is enabled, the image can be booted under Cloud Hypervisor. The SAC prompt will show up
-Computer is booting, SAC started and initialized.                               
-                                                                                
-Use the "ch -?" command for information about using channels.                   
-Use the "?" command for general help.                                           
-                                                                                
-                                                                                
+Computer is booting, SAC started and initialized.
+
+Use the "ch -?" command for information about using channels.
+Use the "?" command for general help.
+
 SAC>
 
@@ -139,7 +194,7 @@ As the simplest option, using `--net tap=` in the Cloud Hypervisor command line
 SAC>i 10 192.168.249.2 255.255.255.0 192.168.249.1
-
+ Where `10` is the device index as shown by the `i` command. @@ -149,26 +204,38 @@ Additional steps are necessary to provide the guest with internet access. - On the guest, add the DNS server either by using `netsh` or by opening `Network and Connectivity Center` and editing the adapter properties. - On the host, configure the traffic forwarding. Replace the `NET_DEV` with the name of your network device. + ```shell NET_DEV=wlp3s0 sysctl -w net.ipv4.ip_forward=1 iptables -t nat -A POSTROUTING -o $NET_DEV -j MASQUERADE ``` +If needed, you can also allow ICMP from host to guest via the following command executed on the guest: + +```shell +netsh advfirewall firewall add rule name="Allow ICMPv4" protocol=icmpv4:8,any dir=in action=allow +``` + +This will enable simple `ping` requests from your host to the guest. + ### Remote Desktop Protocol (RDP) enablement #### Using QEMU - - Execute `SystemPropertiesRemote` - - In the properties window, choose "Allow remote connections to this computer" - - Click "Select Users" and add some user to the allow list + +- Execute `SystemPropertiesRemote` +- In the properties window, choose "Allow remote connections to this computer" +- Click "Select Users" and add some user to the allow list + #### Using powershell + ```powershell Set-ItemProperty "HKLM:\SYSTEM\CurrentControlSet\Control\Terminal Server\" -Name "fDenyTSConnections" -Value 0 Enable-NetFirewallRule -DisplayGroup "Remote Desktop" Add-LocalGroupMember -Group "Remote Desktop Users" -Member someuser ``` - -Administrators can always RDP, non administrator users have to be explicitly enabled. + +Administrators can always RDP, non administrator users have to be explicitly enabled. Once the configuration is set, RDP clients can connect to `192.168.249.2`. @@ -182,7 +249,15 @@ Start-Service sshd Set-Service -Name sshd -StartupType ‘Automatic’ ``` -This allows for SSH login from a remote machine, for example through the `administrator` user: `ssh administrator@192.168.249.2`. For a more detailed OpenSSH guide, please follow the MSDN article from the [links](#links) section. +This allows for SSH login from a remote machine, for example through the `administrator` user: `ssh administrator@192.168.249.2`. + +On Windows 11, opening the firewall was needed as well: + +```powershell +New-NetFirewallRule -Name sshd -DisplayName "OpenSSH Server" -Enabled True -Direction Inbound -Protocol TCP -Action Allow -LocalPort 22 +``` + +For a more detailed OpenSSH guide, please follow the MSDN article from the [links](#links) section. ## Hotplug capability @@ -196,6 +271,8 @@ Disk hotplug and hot-remove are supported. After the device has been hotplugged, ## Debugging +Disclaimer: This chapter was not verified on Windows 11 yet. Proceed with care. + The Windows guest debugging process relies heavily on QEMU and [socat](http://www.dest-unreach.org/socat/). The procedure requires two Windows VMs: - A debugger VM running under QEMU. @@ -203,7 +280,7 @@ The Windows guest debugging process relies heavily on QEMU and [socat](http://ww The connection between both guests happens over TCP, whereby on the guest side it is automatically translated to a COM port. Because the VMs are connected through TCP, the debugging infrastructure can be distributed over the network. The serial port, while slowly transferring data, is common enough to support a wide range of cases and tools. -In this exercise, [WinDbg](https://docs.microsoft.com/en-us/windows-hardware/drivers/debugger/) is used. Any other debugger of choice with the ability to use serial connection can be used instead. +In this exercise, [WinDbg](https://docs.microsoft.com/en-us/windows-hardware/drivers/debugger/) is used. Any other debugger of choice with the ability to use serial connection can be used instead. ### Debugger and Debuggee @@ -220,7 +297,7 @@ qemu-system-x86_64 \ -smp 1 \ -m 4G \ -cdrom ./$WIN_ISO_FILE \ - -drive file=./$VIRTIO_ISO_FILE,index=0,media=cdrom + -drive file=./$VIRTIO_ISO_FILE,index=0,media=cdrom \ -drive if=none,id=root,file=./windbg-disk.raw \ -device virtio-blk-pci,drive=root,disable-legacy=on \ -device virtio-net-pci,netdev=mynet0,disable-legacy=on \ @@ -256,7 +333,7 @@ bcdedit /debug on bcdedit /bootdebug on ``` -##### Turn on boot manager debug +##### Turn on boot manager debug ```cmd bcdedit /set {bootmgr} bootdebug on @@ -308,6 +385,7 @@ Once started, WinDbg will wait for an incoming connection which is going to be i ##### Under QEMU Essentially it would be the command like depicted in the guest preparation sections, with a few modifications: + ```shell qemu-system-x86_64 \ -machine q35,accel=kvm \ @@ -315,7 +393,7 @@ qemu-system-x86_64 \ -m 4G \ -bios ./$OVMF_DIR/OVMF_CODE.fd \ -cdrom ./$WIN_ISO_FILE \ - -drive file=./$VIRTIO_ISO_FILE,index=0,media=cdrom + -drive file=./$VIRTIO_ISO_FILE,index=0,media=cdrom \ -drive if=none,id=root,file=./$IMG_FILE \ -device virtio-blk-pci,drive=root,disable-legacy=on \ -device virtio-net-pci,netdev=mynet0,disable-legacy=on \ From b0bf889d586bfbf1f3ab9fe30fc0c25a08067fbc Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Fri, 25 Jul 2025 23:19:14 +0000 Subject: [PATCH 044/294] build: Bump serde_with from 3.9.0 to 3.14.0 Bumps [serde_with](https://github.com/jonasbb/serde_with) from 3.9.0 to 3.14.0. - [Release notes](https://github.com/jonasbb/serde_with/releases) - [Commits](https://github.com/jonasbb/serde_with/compare/v3.9.0...v3.14.0) --- updated-dependencies: - dependency-name: serde_with dependency-version: 3.14.0 dependency-type: direct:production update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] --- Cargo.lock | 10 +++++----- hypervisor/Cargo.toml | 2 +- virtio-devices/Cargo.toml | 2 +- 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index b58a6c4563..7e07c7551b 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -579,7 +579,7 @@ dependencies = [ "libc", "option-ext", "redox_users", - "windows-sys 0.59.0", + "windows-sys 0.60.2", ] [[package]] @@ -1924,9 +1924,9 @@ dependencies = [ [[package]] name = "serde_with" -version = "3.9.0" +version = "3.14.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "69cecfa94848272156ea67b2b1a53f20fc7bc638c4a46d2f8abde08f05f4b857" +checksum = "f2c45cd61fefa9db6f254525d46e392b852e0e61d9a1fd36e5bd183450a556d5" dependencies = [ "serde", "serde_derive", @@ -1935,9 +1935,9 @@ dependencies = [ [[package]] name = "serde_with_macros" -version = "3.9.0" +version = "3.14.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a8fee4991ef4f274617a51ad4af30519438dacb2f56ac773b08a1922ff743350" +checksum = "de90945e6565ce0d9a25098082ed4ee4002e047cb59892c318d66821e14bb30f" dependencies = [ "darling", "proc-macro2", diff --git a/hypervisor/Cargo.toml b/hypervisor/Cargo.toml index 60f53c7d4d..9a254dd940 100644 --- a/hypervisor/Cargo.toml +++ b/hypervisor/Cargo.toml @@ -33,7 +33,7 @@ mshv-ioctls = { workspace = true, optional = true } open-enum = "0.5.2" serde = { version = "1.0.208", features = ["derive", "rc"] } serde_json = { workspace = true } -serde_with = { version = "3.9.0", default-features = false, features = [ +serde_with = { version = "3.14.0", default-features = false, features = [ "macros", ] } thiserror = { workspace = true } diff --git a/virtio-devices/Cargo.toml b/virtio-devices/Cargo.toml index a4c70d111f..9bf2efeca5 100644 --- a/virtio-devices/Cargo.toml +++ b/virtio-devices/Cargo.toml @@ -24,7 +24,7 @@ pci = { path = "../pci" } rate_limiter = { path = "../rate_limiter" } seccompiler = { workspace = true } serde = { version = "1.0.208", features = ["derive"] } -serde_with = { version = "3.9.0", default-features = false, features = [ +serde_with = { version = "3.14.0", default-features = false, features = [ "macros", ] } serial_buffer = { path = "../serial_buffer" } From 8c136041cbdf2879584f3ca3eb5d8371e9435680 Mon Sep 17 00:00:00 2001 From: Oliver Anderson Date: Fri, 25 Jul 2025 10:27:22 +0200 Subject: [PATCH 045/294] build: Use workspace dependencies Many of the workspace members in the Cloud-hypervisor workspace share common dependencies. Making these workspace dependencies reduces duplication and improves maintainability. Signed-off-by: Oliver Anderson On-behalf-of: SAP oliver.anderson@sap.com --- Cargo.toml | 33 ++++++++++++++++++++++++--------- arch/Cargo.toml | 10 +++++----- block/Cargo.toml | 8 ++++---- devices/Cargo.toml | 12 ++++++------ event_monitor/Cargo.toml | 6 +++--- hypervisor/Cargo.toml | 14 +++++++------- net_util/Cargo.toml | 8 ++++---- pci/Cargo.toml | 10 +++++----- performance-metrics/Cargo.toml | 6 +++--- rate_limiter/Cargo.toml | 6 +++--- test_infra/Cargo.toml | 8 ++++---- tpm/Cargo.toml | 6 +++--- tracer/Cargo.toml | 6 +++--- vhost_user_block/Cargo.toml | 6 +++--- vhost_user_net/Cargo.toml | 8 ++++---- virtio-devices/Cargo.toml | 14 +++++++------- vm-allocator/Cargo.toml | 2 +- vm-device/Cargo.toml | 2 +- vm-migration/Cargo.toml | 4 ++-- vmm/Cargo.toml | 22 +++++++++++----------- 20 files changed, 103 insertions(+), 88 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index f05de398ee..b11b0b728d 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -29,20 +29,20 @@ inherits = "release" strip = false [dependencies] -anyhow = "1.0.94" +anyhow = { workspace = true } api_client = { path = "api_client" } -clap = { version = "4.5.13", features = ["string"] } -dhat = { version = "0.3.3", optional = true } +clap = { workspace = true, features = ["string"] } +dhat = { workspace = true, optional = true } env_logger = { workspace = true } -epoll = "4.3.3" +epoll = { workspace = true } event_monitor = { path = "event_monitor" } hypervisor = { path = "hypervisor" } -libc = "0.2.167" -log = { version = "0.4.22", features = ["std"] } +libc = { workspace = true } +log = { workspace = true, features = ["std"] } option_parser = { path = "option_parser" } seccompiler = { workspace = true } serde_json = { workspace = true } -signal-hook = "0.3.18" +signal-hook = { workspace = true } thiserror = { workspace = true } tpm = { path = "tpm" } tracer = { path = "tracer" } @@ -52,11 +52,11 @@ vmm-sys-util = { workspace = true } zbus = { version = "5.7.1", optional = true } [dev-dependencies] -dirs = "6.0.0" +dirs = { workspace = true } net_util = { path = "net_util" } serde_json = { workspace = true } test_infra = { path = "test_infra" } -wait-timeout = "0.2.0" +wait-timeout = { workspace = true } # Please adjust `vmm::feature_list()` accordingly when changing the # feature list below @@ -127,10 +127,25 @@ igvm = { git = "https://github.com/microsoft/igvm", branch = "main" } igvm_defs = { git = "https://github.com/microsoft/igvm", branch = "main" } # serde crates +serde = "1.0.208" serde_json = "1.0.120" +serde_with = { version = "3.14.0", default-features = false } # other crates +anyhow = "1.0.94" +bitflags = "2.9.0" +byteorder = "1.5.0" +cfg-if = "1.0.0" +clap = "4.5.13" +dhat = "0.3.3" +dirs = "6.0.0" env_logger = "0.11.8" +epoll = "4.3.3" +flume = "0.11.1" +libc = "0.2.167" +log = "0.4.22" +signal-hook = "0.3.18" thiserror = "2.0.12" uuid = { version = "1.17.0" } +wait-timeout = "0.2.0" zerocopy = { version = "0.8.26", default-features = false } diff --git a/arch/Cargo.toml b/arch/Cargo.toml index 03d2ad4af5..3e2202f025 100644 --- a/arch/Cargo.toml +++ b/arch/Cargo.toml @@ -11,13 +11,13 @@ sev_snp = [] tdx = [] [dependencies] -anyhow = "1.0.94" -byteorder = "1.5.0" +anyhow = { workspace = true } +byteorder = { workspace = true } hypervisor = { path = "../hypervisor" } -libc = "0.2.167" +libc = { workspace = true } linux-loader = { workspace = true, features = ["bzimage", "elf", "pe"] } -log = "0.4.22" -serde = { version = "1.0.208", features = ["derive", "rc"] } +log = { workspace = true } +serde = { workspace = true, features = ["derive", "rc"] } thiserror = { workspace = true } uuid = { workspace = true } vm-memory = { workspace = true, features = ["backend-bitmap", "backend-mmap"] } diff --git a/block/Cargo.toml b/block/Cargo.toml index 48551251a5..f1e102e922 100644 --- a/block/Cargo.toml +++ b/block/Cargo.toml @@ -9,13 +9,13 @@ default = [] io_uring = ["dep:io-uring"] [dependencies] -byteorder = "1.5.0" +byteorder = { workspace = true } crc-any = "2.5.0" io-uring = { version = "0.6.4", optional = true } -libc = "0.2.167" -log = "0.4.22" +libc = { workspace = true } +log = { workspace = true } remain = "0.2.15" -serde = { version = "1.0.208", features = ["derive"] } +serde = { workspace = true, features = ["derive"] } smallvec = "1.13.2" thiserror = { workspace = true } uuid = { workspace = true, features = ["v4"] } diff --git a/devices/Cargo.toml b/devices/Cargo.toml index 334ec0e310..0c32e468e7 100644 --- a/devices/Cargo.toml +++ b/devices/Cargo.toml @@ -6,17 +6,17 @@ version = "0.1.0" [dependencies] acpi_tables = { workspace = true } -anyhow = "1.0.94" +anyhow = { workspace = true } arch = { path = "../arch" } -bitflags = "2.9.0" -byteorder = "1.5.0" +bitflags = { workspace = true } +byteorder = { workspace = true } event_monitor = { path = "../event_monitor" } hypervisor = { path = "../hypervisor" } -libc = "0.2.167" -log = "0.4.22" +libc = { workspace = true } +log = { workspace = true } num_enum = "0.7.2" pci = { path = "../pci" } -serde = { version = "1.0.208", features = ["derive"] } +serde = { workspace = true, features = ["derive"] } thiserror = { workspace = true } tpm = { path = "../tpm" } vm-allocator = { path = "../vm-allocator" } diff --git a/event_monitor/Cargo.toml b/event_monitor/Cargo.toml index 764cc6218e..af63335046 100644 --- a/event_monitor/Cargo.toml +++ b/event_monitor/Cargo.toml @@ -5,7 +5,7 @@ name = "event_monitor" version = "0.1.0" [dependencies] -flume = "0.11.1" -libc = "0.2.167" -serde = { version = "1.0.208", features = ["derive", "rc"] } +flume = { workspace = true } +libc = { workspace = true } +serde = { workspace = true, features = ["derive", "rc"] } serde_json = { workspace = true } diff --git a/hypervisor/Cargo.toml b/hypervisor/Cargo.toml index 9a254dd940..bdfefdeefd 100644 --- a/hypervisor/Cargo.toml +++ b/hypervisor/Cargo.toml @@ -13,27 +13,27 @@ sev_snp = ["igvm", "igvm_defs"] tdx = [] [dependencies] -anyhow = "1.0.94" +anyhow = { workspace = true } arc-swap = "1.7.1" bitfield-struct = "0.10.1" -byteorder = "1.5.0" -cfg-if = "1.0.0" +byteorder = { workspace = true } +cfg-if = { workspace = true } concat-idents = "1.1.5" igvm = { workspace = true, optional = true } igvm_defs = { workspace = true, optional = true } kvm-bindings = { workspace = true, optional = true, features = ["serde"] } kvm-ioctls = { workspace = true, optional = true } -libc = "0.2.167" -log = "0.4.22" +libc = { workspace = true } +log = { workspace = true } mshv-bindings = { workspace = true, features = [ "fam-wrappers", "with-serde", ], optional = true } mshv-ioctls = { workspace = true, optional = true } open-enum = "0.5.2" -serde = { version = "1.0.208", features = ["derive", "rc"] } +serde = { workspace = true, features = ["derive", "rc"] } serde_json = { workspace = true } -serde_with = { version = "3.14.0", default-features = false, features = [ +serde_with = { workspace = true, default-features = false, features = [ "macros", ] } thiserror = { workspace = true } diff --git a/net_util/Cargo.toml b/net_util/Cargo.toml index 981b5b3d9e..74ad342085 100644 --- a/net_util/Cargo.toml +++ b/net_util/Cargo.toml @@ -5,13 +5,13 @@ name = "net_util" version = "0.1.0" [dependencies] -epoll = "4.3.3" +epoll = { workspace = true } getrandom = "0.3.3" -libc = "0.2.167" -log = "0.4.22" +libc = { workspace = true } +log = { workspace = true } net_gen = { path = "../net_gen" } rate_limiter = { path = "../rate_limiter" } -serde = { version = "1.0.208", features = ["derive"] } +serde = { workspace = true, features = ["derive"] } thiserror = { workspace = true } virtio-bindings = { workspace = true } virtio-queue = { workspace = true } diff --git a/pci/Cargo.toml b/pci/Cargo.toml index 0e29051815..49618b194b 100644 --- a/pci/Cargo.toml +++ b/pci/Cargo.toml @@ -10,12 +10,12 @@ kvm = ["hypervisor/kvm", "vfio-ioctls/kvm"] mshv = ["hypervisor/mshv", "vfio-ioctls/mshv"] [dependencies] -anyhow = "1.0.94" -byteorder = "1.5.0" +anyhow = { workspace = true } +byteorder = { workspace = true } hypervisor = { path = "../hypervisor" } -libc = "0.2.167" -log = "0.4.22" -serde = { version = "1.0.208", features = ["derive"] } +libc = { workspace = true } +log = { workspace = true } +serde = { workspace = true, features = ["derive"] } thiserror = { workspace = true } vfio-bindings = { workspace = true, features = ["fam-wrappers"] } vfio-ioctls = { workspace = true, default-features = false } diff --git a/performance-metrics/Cargo.toml b/performance-metrics/Cargo.toml index 8572bf5160..2403913767 100644 --- a/performance-metrics/Cargo.toml +++ b/performance-metrics/Cargo.toml @@ -6,9 +6,9 @@ name = "performance-metrics" version = "0.1.0" [dependencies] -clap = { version = "4.5.13", features = ["wrap_help"] } -dirs = "6.0.0" -serde = { version = "1.0.208", features = ["derive", "rc"] } +clap = { workspace = true, features = ["wrap_help"] } +dirs = { workspace = true } +serde = { workspace = true, features = ["derive", "rc"] } serde_json = { workspace = true } test_infra = { path = "../test_infra" } thiserror = { workspace = true } diff --git a/rate_limiter/Cargo.toml b/rate_limiter/Cargo.toml index a286b9e20a..6c6cdd650e 100644 --- a/rate_limiter/Cargo.toml +++ b/rate_limiter/Cargo.toml @@ -4,8 +4,8 @@ name = "rate_limiter" version = "0.1.0" [dependencies] -epoll = "4.3.3" -libc = "0.2.167" -log = "0.4.22" +epoll = { workspace = true } +libc = { workspace = true } +log = { workspace = true } thiserror = { workspace = true } vmm-sys-util = { workspace = true } diff --git a/test_infra/Cargo.toml b/test_infra/Cargo.toml index 5aeae23c58..37c6f38454 100644 --- a/test_infra/Cargo.toml +++ b/test_infra/Cargo.toml @@ -5,11 +5,11 @@ name = "test_infra" version = "0.1.0" [dependencies] -dirs = "6.0.0" -epoll = "4.3.3" -libc = "0.2.167" +dirs = { workspace = true } +epoll = { workspace = true } +libc = { workspace = true } serde_json = { workspace = true } ssh2 = { version = "0.9.4", features = ["vendored-openssl"] } thiserror = { workspace = true } vmm-sys-util = { workspace = true } -wait-timeout = "0.2.0" +wait-timeout = { workspace = true } diff --git a/tpm/Cargo.toml b/tpm/Cargo.toml index fa3461245d..cf03968cde 100644 --- a/tpm/Cargo.toml +++ b/tpm/Cargo.toml @@ -6,9 +6,9 @@ name = "tpm" version = "0.1.0" [dependencies] -anyhow = "1.0.94" -libc = "0.2.167" -log = "0.4.22" +anyhow = { workspace = true } +libc = { workspace = true } +log = { workspace = true } net_gen = { path = "../net_gen" } thiserror = { workspace = true } vmm-sys-util = { workspace = true } diff --git a/tracer/Cargo.toml b/tracer/Cargo.toml index 5d4cb678c9..368520870f 100644 --- a/tracer/Cargo.toml +++ b/tracer/Cargo.toml @@ -5,9 +5,9 @@ name = "tracer" version = "0.1.0" [dependencies] -libc = "0.2.167" -log = "0.4.22" -serde = { version = "1.0.208", features = ["derive", "rc"] } +libc = { workspace = true } +log = { workspace = true } +serde = { workspace = true, features = ["derive", "rc"] } serde_json = { workspace = true } [features] diff --git a/vhost_user_block/Cargo.toml b/vhost_user_block/Cargo.toml index 0648d13be5..ea1acf12a3 100644 --- a/vhost_user_block/Cargo.toml +++ b/vhost_user_block/Cargo.toml @@ -7,10 +7,10 @@ version = "0.1.0" [dependencies] block = { path = "../block" } -clap = { version = "4.5.13", features = ["cargo", "wrap_help"] } +clap = { workspace = true, features = ["cargo", "wrap_help"] } env_logger = { workspace = true } -libc = "0.2.167" -log = "0.4.22" +libc = { workspace = true } +log = { workspace = true } option_parser = { path = "../option_parser" } thiserror = { workspace = true } vhost = { workspace = true, features = ["vhost-user-backend"] } diff --git a/vhost_user_net/Cargo.toml b/vhost_user_net/Cargo.toml index 515287e2cc..f84eae8d18 100644 --- a/vhost_user_net/Cargo.toml +++ b/vhost_user_net/Cargo.toml @@ -6,11 +6,11 @@ name = "vhost_user_net" version = "0.1.0" [dependencies] -clap = { version = "4.5.13", features = ["cargo", "wrap_help"] } +clap = { workspace = true, features = ["cargo", "wrap_help"] } env_logger = { workspace = true } -epoll = "4.3.3" -libc = "0.2.167" -log = "0.4.22" +epoll = { workspace = true } +libc = { workspace = true } +log = { workspace = true } net_util = { path = "../net_util" } option_parser = { path = "../option_parser" } thiserror = { workspace = true } diff --git a/virtio-devices/Cargo.toml b/virtio-devices/Cargo.toml index 9bf2efeca5..e13e98f759 100644 --- a/virtio-devices/Cargo.toml +++ b/virtio-devices/Cargo.toml @@ -11,20 +11,20 @@ mshv = ["pci/mshv"] sev_snp = ["mshv-ioctls"] [dependencies] -anyhow = "1.0.94" +anyhow = { workspace = true } block = { path = "../block" } -byteorder = "1.5.0" -epoll = "4.3.3" +byteorder = { workspace = true } +epoll = { workspace = true } event_monitor = { path = "../event_monitor" } -libc = "0.2.167" -log = "0.4.22" +libc = { workspace = true } +log = { workspace = true } mshv-ioctls = { workspace = true, optional = true } net_util = { path = "../net_util" } pci = { path = "../pci" } rate_limiter = { path = "../rate_limiter" } seccompiler = { workspace = true } -serde = { version = "1.0.208", features = ["derive"] } -serde_with = { version = "3.14.0", default-features = false, features = [ +serde = { workspace = true, features = ["derive"] } +serde_with = { workspace = true, default-features = false, features = [ "macros", ] } serial_buffer = { path = "../serial_buffer" } diff --git a/vm-allocator/Cargo.toml b/vm-allocator/Cargo.toml index 1cc0ae9e1f..4f546058e1 100644 --- a/vm-allocator/Cargo.toml +++ b/vm-allocator/Cargo.toml @@ -9,7 +9,7 @@ default = [] kvm = ["arch/kvm"] [dependencies] -libc = "0.2.167" +libc = { workspace = true } vm-memory = { workspace = true } [target.'cfg(any(target_arch = "aarch64", target_arch = "riscv64"))'.dependencies] diff --git a/vm-device/Cargo.toml b/vm-device/Cargo.toml index 8262f84b00..9df6af3a0a 100644 --- a/vm-device/Cargo.toml +++ b/vm-device/Cargo.toml @@ -11,7 +11,7 @@ mshv = ["vfio-ioctls/mshv"] [dependencies] hypervisor = { path = "../hypervisor" } -serde = { version = "1.0.208", features = ["derive", "rc"] } +serde = { workspace = true, features = ["derive", "rc"] } thiserror = { workspace = true } vfio-ioctls = { workspace = true, default-features = false } vmm-sys-util = { workspace = true } diff --git a/vm-migration/Cargo.toml b/vm-migration/Cargo.toml index 5a992c6070..de10ebdb33 100644 --- a/vm-migration/Cargo.toml +++ b/vm-migration/Cargo.toml @@ -5,8 +5,8 @@ name = "vm-migration" version = "0.1.0" [dependencies] -anyhow = "1.0.94" -serde = { version = "1.0.208", features = ["derive", "rc"] } +anyhow = { workspace = true } +serde = { workspace = true, features = ["derive", "rc"] } serde_json = { workspace = true } thiserror = { workspace = true } vm-memory = { workspace = true, features = ["backend-atomic", "backend-mmap"] } diff --git a/vmm/Cargo.toml b/vmm/Cargo.toml index b28946f320..2ca7cd2dc4 100644 --- a/vmm/Cargo.toml +++ b/vmm/Cargo.toml @@ -33,18 +33,18 @@ tracing = ["tracer/tracing"] [dependencies] acpi_tables = { workspace = true } -anyhow = "1.0.94" +anyhow = { workspace = true } arch = { path = "../arch" } -bitflags = "2.9.0" +bitflags = { workspace = true } block = { path = "../block" } blocking = { version = "1.6.1", optional = true } -cfg-if = "1.0.0" -clap = "4.5.13" +cfg-if = { workspace = true } +clap = { workspace = true } devices = { path = "../devices" } -dhat = { version = "0.3.3", optional = true } -epoll = "4.3.3" +dhat = { workspace = true, optional = true } +epoll = { workspace = true } event_monitor = { path = "../event_monitor" } -flume = "0.11.1" +flume = { workspace = true } futures = { version = "0.3.31", optional = true } gdbstub = { version = "0.7.6", optional = true } gdbstub_arch = { version = "0.3.0", optional = true } @@ -53,9 +53,9 @@ hypervisor = { path = "../hypervisor" } igvm = { workspace = true, optional = true } igvm_defs = { workspace = true, optional = true } landlock = "0.4.0" -libc = "0.2.167" +libc = { workspace = true } linux-loader = { workspace = true, features = ["bzimage", "elf", "pe"] } -log = "0.4.22" +log = { workspace = true } micro_http = { git = "https://github.com/firecracker-microvm/micro-http", branch = "main" } mshv-bindings = { workspace = true, features = [ "fam-wrappers", @@ -67,10 +67,10 @@ pci = { path = "../pci" } range_map_vec = { version = "0.2.0", optional = true } rate_limiter = { path = "../rate_limiter" } seccompiler = { workspace = true } -serde = { version = "1.0.208", features = ["derive", "rc"] } +serde = { workspace = true, features = ["derive", "rc"] } serde_json = { workspace = true } serial_buffer = { path = "../serial_buffer" } -signal-hook = "0.3.18" +signal-hook = { workspace = true } thiserror = { workspace = true } tracer = { path = "../tracer" } uuid = { workspace = true } From ba962a30d5cac4fe4f7a4e99dd98d9b5193414f3 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Tue, 29 Jul 2025 03:05:45 +0000 Subject: [PATCH 046/294] build: Bump r-efi from 5.2.0 to 5.3.0 Bumps [r-efi](https://github.com/r-efi/r-efi) from 5.2.0 to 5.3.0. - [Release notes](https://github.com/r-efi/r-efi/releases) - [Changelog](https://github.com/r-efi/r-efi/blob/main/NEWS.md) - [Commits](https://github.com/r-efi/r-efi/compare/v5.2.0...v5.3.0) --- updated-dependencies: - dependency-name: r-efi dependency-version: 5.3.0 dependency-type: indirect update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] --- Cargo.lock | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 7e07c7551b..4f2af0eb4a 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1696,9 +1696,9 @@ dependencies = [ [[package]] name = "r-efi" -version = "5.2.0" +version = "5.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "74765f6d916ee2faa39bc8e68e4f3ed8949b48cccdac59983d287a7cb71ce9c5" +checksum = "69cdb34c158ceb288df11e18b4bd39de994f6657d83847bdffdbd7f346754b0f" [[package]] name = "rand" From 7c631b2d1b16a193c5c7ba0a3e8165d949275f39 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Tue, 29 Jul 2025 04:46:43 +0000 Subject: [PATCH 047/294] build: Bump toml_datetime from 0.6.8 to 0.6.11 Bumps [toml_datetime](https://github.com/toml-rs/toml) from 0.6.8 to 0.6.11. - [Commits](https://github.com/toml-rs/toml/compare/toml_datetime-v0.6.8...toml_datetime-v0.6.11) --- updated-dependencies: - dependency-name: toml_datetime dependency-version: 0.6.11 dependency-type: indirect update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] --- Cargo.lock | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 4f2af0eb4a..ffc1ef8e0b 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2118,9 +2118,9 @@ checksum = "3bf63baf9f5039dadc247375c29eb13706706cfde997d0330d05aa63a77d8820" [[package]] name = "toml_datetime" -version = "0.6.8" +version = "0.6.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0dd7358ecb8fc2f8d014bf86f6f638ce72ba252a2c3a2572f2a795f1d23efb41" +checksum = "22cddaf88f4fbc13c51aebbf5f8eceb5c7c5a9da2ac40a13519eb5b0a0e8f11c" [[package]] name = "toml_edit" From 8e010f1aa3e3d09fc40e721aa9e90de36898befa Mon Sep 17 00:00:00 2001 From: Muminul Islam Date: Mon, 28 Jul 2025 12:09:28 -0700 Subject: [PATCH 048/294] vmm: don't configure system if rsdp is not available In case of CVM guest rsdp is set to none. Unwrapping it make the vmm crashed. Don't call configure system if the rsdb address is none. Signed-off-by: Muminul Islam --- vmm/src/vm.rs | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/vmm/src/vm.rs b/vmm/src/vm.rs index d7bba25cc0..012ebef05e 100644 --- a/vmm/src/vm.rs +++ b/vmm/src/vm.rs @@ -2332,15 +2332,16 @@ impl Vm { let rsdp_addr = self.create_acpi_tables(); #[cfg(not(target_arch = "riscv64"))] - // Configure shared state based on loaded kernel - entry_point - .map(|entry_point| { - // Safe to unwrap rsdp_addr as we know it can't be None when - // the entry_point is Some. - self.configure_system(rsdp_addr.unwrap(), entry_point) - }) - .transpose()?; - + { + #[cfg(not(feature = "sev_snp"))] + assert!(rsdp_addr.is_some()); + // Configure shared state based on loaded kernel + if let Some(rsdp_adr) = rsdp_addr { + entry_point + .map(|entry_point| self.configure_system(rsdp_adr, entry_point)) + .transpose()?; + } + } #[cfg(target_arch = "riscv64")] self.configure_system().unwrap(); From 003e89e8cd5d8cafe241212fa3d6f100d4ff97ea Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Tue, 29 Jul 2025 23:37:12 +0000 Subject: [PATCH 049/294] build: Bump anstream from 0.6.15 to 0.6.19 Bumps [anstream](https://github.com/rust-cli/anstyle) from 0.6.15 to 0.6.19. - [Commits](https://github.com/rust-cli/anstyle/compare/anstream-v0.6.15...anstream-v0.6.19) --- updated-dependencies: - dependency-name: anstream dependency-version: 0.6.19 dependency-type: indirect update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] --- Cargo.lock | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index ffc1ef8e0b..db394f861e 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -36,9 +36,9 @@ dependencies = [ [[package]] name = "anstream" -version = "0.6.15" +version = "0.6.19" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "64e15c1ab1f89faffbf04a634d5e1962e9074f2741eef6d97f3c4e322426d526" +checksum = "301af1932e46185686725e0fad2f8f2aa7da69dd70bf6ecc44d6b703844a3933" dependencies = [ "anstyle", "anstyle-parse", @@ -655,7 +655,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "cea14ef9355e3beab063703aa9dab15afd25f0667c341310c1e5274bb1d0da18" dependencies = [ "libc", - "windows-sys 0.59.0", + "windows-sys 0.52.0", ] [[package]] @@ -1837,7 +1837,7 @@ dependencies = [ "errno", "libc", "linux-raw-sys 0.4.15", - "windows-sys 0.59.0", + "windows-sys 0.52.0", ] [[package]] @@ -1850,7 +1850,7 @@ dependencies = [ "errno", "libc", "linux-raw-sys 0.9.4", - "windows-sys 0.59.0", + "windows-sys 0.52.0", ] [[package]] From 8a37e154db90ca0db200cc28752f0a19581e7500 Mon Sep 17 00:00:00 2001 From: Shubham Chakrawar Date: Tue, 29 Jul 2025 16:51:32 -0700 Subject: [PATCH 050/294] ci: Prevent link checker rate limits Optimize lychee workflow to check only changed files in pull requests, avoiding excessive API calls to prevent rate limits. Fixes #7056 Signed-off-by: Shubham Chakrawar --- .github/workflows/lychee.yaml | 39 ++++++++++++++++++++++++++++++----- 1 file changed, 34 insertions(+), 5 deletions(-) diff --git a/.github/workflows/lychee.yaml b/.github/workflows/lychee.yaml index dd3a372dc8..620648bf9b 100644 --- a/.github/workflows/lychee.yaml +++ b/.github/workflows/lychee.yaml @@ -1,7 +1,5 @@ name: Link Check (lychee) -on: - pull_request - +on: pull_request jobs: link_check: name: Link Check @@ -9,8 +7,39 @@ jobs: steps: - name: Code checkout uses: actions/checkout@v4 + with: + # Fetch the entire history so git diff can compare against the base branch + fetch-depth: 0 + - name: Get changed files in PR + id: changed-files + uses: tj-actions/changed-files@v40 # Using a dedicated action for robustness + with: + # Compare the HEAD of the PR with the merge-base (where the PR branches off) + base_sha: ${{ github.event.pull_request.base.sha }} - - name: Link Availability Check + # NEW STEP: Print all changed-files outputs for verification + - name: Verify Changed Files + run: | + echo "--- tj-actions/changed-files Outputs ---" + echo "any_changed: ${{ steps.changed-files.outputs.any_changed }}" + echo "all_changed_files: ${{ steps.changed-files.outputs.all_changed_files }}" + echo "added_files: ${{ steps.changed-files.outputs.added_files }}" + echo "modified_files: ${{ steps.changed-files.outputs.modified_files }}" + echo "deleted_files: ${{ steps.changed-files.outputs.deleted_files }}" + echo "renamed_files: ${{ steps.changed-files.outputs.renamed_files }}" + echo "----------------------------------------" + # This will also show if the all_changed_files string is empty or not + if [ -n "${{ steps.changed-files.outputs.all_changed_files }}" ]; then + echo "Detected changes: all_changed_files output is NOT empty." + else + echo "No changes detected: all_changed_files output IS empty." + fi + - name: Link Availability Check (Diff Only) + # MODIFIED: Only run lychee if the 'all_changed_files' output is not an empty string + if: ${{ steps.changed-files.outputs.all_changed_files != '' }} uses: lycheeverse/lychee-action@master with: - args: --verbose --config .lychee.toml . + # Pass the space-separated list of changed files to lychee + args: --verbose --config .lychee.toml ${{ steps.changed-files.outputs.all_changed_files }} + failIfEmpty: false + fail: true \ No newline at end of file From 16b0e0848248d044b13dc62c65335f4ae1fa9e7a Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Wed, 30 Jul 2025 23:18:02 +0000 Subject: [PATCH 051/294] build: Bump tj-actions/changed-files from 40 to 46 in /.github/workflows Bumps [tj-actions/changed-files](https://github.com/tj-actions/changed-files) from 40 to 46. - [Release notes](https://github.com/tj-actions/changed-files/releases) - [Changelog](https://github.com/tj-actions/changed-files/blob/main/HISTORY.md) - [Commits](https://github.com/tj-actions/changed-files/compare/v40...v46) --- updated-dependencies: - dependency-name: tj-actions/changed-files dependency-version: '46' dependency-type: direct:production ... Signed-off-by: dependabot[bot] --- .github/workflows/lychee.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/lychee.yaml b/.github/workflows/lychee.yaml index 620648bf9b..68271c509b 100644 --- a/.github/workflows/lychee.yaml +++ b/.github/workflows/lychee.yaml @@ -12,7 +12,7 @@ jobs: fetch-depth: 0 - name: Get changed files in PR id: changed-files - uses: tj-actions/changed-files@v40 # Using a dedicated action for robustness + uses: tj-actions/changed-files@v46 # Using a dedicated action for robustness with: # Compare the HEAD of the PR with the merge-base (where the PR branches off) base_sha: ${{ github.event.pull_request.base.sha }} From 254db85a33052c13f2e00cc4728c92f1ebb0ee28 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Wed, 30 Jul 2025 23:50:15 +0000 Subject: [PATCH 052/294] build: Bump anyhow from 1.0.94 to 1.0.98 Bumps [anyhow](https://github.com/dtolnay/anyhow) from 1.0.94 to 1.0.98. - [Release notes](https://github.com/dtolnay/anyhow/releases) - [Commits](https://github.com/dtolnay/anyhow/compare/1.0.94...1.0.98) --- updated-dependencies: - dependency-name: anyhow dependency-version: 1.0.98 dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] --- Cargo.lock | 10 +++++----- Cargo.toml | 2 +- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index db394f861e..b549891b12 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -85,9 +85,9 @@ dependencies = [ [[package]] name = "anyhow" -version = "1.0.94" +version = "1.0.98" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c1fd03a028ef38ba2276dce7e33fcd6369c158a1bca17946c4b1b701891c1ff7" +checksum = "e16d2d3311acee920a9eb8d33b8cbc1787ce4a264e85f964c2404b969bdcd487" [[package]] name = "api_client" @@ -655,7 +655,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "cea14ef9355e3beab063703aa9dab15afd25f0667c341310c1e5274bb1d0da18" dependencies = [ "libc", - "windows-sys 0.52.0", + "windows-sys 0.59.0", ] [[package]] @@ -1837,7 +1837,7 @@ dependencies = [ "errno", "libc", "linux-raw-sys 0.4.15", - "windows-sys 0.52.0", + "windows-sys 0.59.0", ] [[package]] @@ -1850,7 +1850,7 @@ dependencies = [ "errno", "libc", "linux-raw-sys 0.9.4", - "windows-sys 0.52.0", + "windows-sys 0.59.0", ] [[package]] diff --git a/Cargo.toml b/Cargo.toml index b11b0b728d..4e1848010f 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -132,7 +132,7 @@ serde_json = "1.0.120" serde_with = { version = "3.14.0", default-features = false } # other crates -anyhow = "1.0.94" +anyhow = "1.0.98" bitflags = "2.9.0" byteorder = "1.5.0" cfg-if = "1.0.0" From a7645ae9d6ea30acf2fe017385a6cff5df36a658 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Thu, 31 Jul 2025 23:18:29 +0000 Subject: [PATCH 053/294] build: Bump async-process from 2.3.0 to 2.4.0 Bumps [async-process](https://github.com/smol-rs/async-process) from 2.3.0 to 2.4.0. - [Release notes](https://github.com/smol-rs/async-process/releases) - [Changelog](https://github.com/smol-rs/async-process/blob/master/CHANGELOG.md) - [Commits](https://github.com/smol-rs/async-process/compare/v2.3.0...v2.4.0) --- updated-dependencies: - dependency-name: async-process dependency-version: 2.4.0 dependency-type: indirect update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] --- Cargo.lock | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index b549891b12..01a9ef0eda 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -191,9 +191,9 @@ dependencies = [ [[package]] name = "async-process" -version = "2.3.0" +version = "2.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "63255f1dc2381611000436537bbedfe83183faa303a5a0edaf191edef06526bb" +checksum = "65daa13722ad51e6ab1a1b9c01299142bc75135b337923cfa10e79bbbd669f00" dependencies = [ "async-channel", "async-io", @@ -204,8 +204,7 @@ dependencies = [ "cfg-if", "event-listener", "futures-lite", - "rustix 0.38.44", - "tracing", + "rustix 1.0.7", ] [[package]] From b630b22ed91f694ff5f4c4cf5fa365fee32f55bf Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Fri, 1 Aug 2025 23:31:21 +0000 Subject: [PATCH 054/294] build: Bump landlock from 0.4.0 to 0.4.2 Bumps [landlock](https://github.com/landlock-lsm/rust-landlock) from 0.4.0 to 0.4.2. - [Release notes](https://github.com/landlock-lsm/rust-landlock/releases) - [Changelog](https://github.com/landlock-lsm/rust-landlock/blob/main/CHANGELOG.md) - [Commits](https://github.com/landlock-lsm/rust-landlock/compare/v0.4.0...v0.4.2) --- updated-dependencies: - dependency-name: landlock dependency-version: 0.4.2 dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] --- Cargo.lock | 6 +++--- vmm/Cargo.toml | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 01a9ef0eda..46fe34a97a 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1082,13 +1082,13 @@ dependencies = [ [[package]] name = "landlock" -version = "0.4.0" +version = "0.4.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dafb8a4afee64f167eb2b52d32f0eea002e41a7a6450e68c799c8ec3a81a634c" +checksum = "b3d2ef408b88e913bfc6594f5e693d57676f6463ded7d8bf994175364320c706" dependencies = [ "enumflags2", "libc", - "thiserror 1.0.62", + "thiserror 2.0.12", ] [[package]] diff --git a/vmm/Cargo.toml b/vmm/Cargo.toml index 2ca7cd2dc4..bceba9bdd2 100644 --- a/vmm/Cargo.toml +++ b/vmm/Cargo.toml @@ -52,7 +52,7 @@ hex = { version = "0.4.3", optional = true } hypervisor = { path = "../hypervisor" } igvm = { workspace = true, optional = true } igvm_defs = { workspace = true, optional = true } -landlock = "0.4.0" +landlock = "0.4.2" libc = { workspace = true } linux-loader = { workspace = true, features = ["bzimage", "elf", "pe"] } log = { workspace = true } From eb147cb3cccf343ae0fb8c15a5d2b929a6426ef5 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Tue, 5 Aug 2025 06:17:08 +0000 Subject: [PATCH 055/294] build: Bump rustc-demangle from 0.1.24 to 0.1.26 Bumps [rustc-demangle](https://github.com/rust-lang/rustc-demangle) from 0.1.24 to 0.1.26. - [Release notes](https://github.com/rust-lang/rustc-demangle/releases) - [Changelog](https://github.com/rust-lang/rustc-demangle/blob/main/CHANGELOG.md) - [Commits](https://github.com/rust-lang/rustc-demangle/compare/0.1.24...rustc-demangle-v0.1.26) --- updated-dependencies: - dependency-name: rustc-demangle dependency-version: 0.1.26 dependency-type: indirect update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] --- Cargo.lock | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 46fe34a97a..d464823498 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1816,9 +1816,9 @@ dependencies = [ [[package]] name = "rustc-demangle" -version = "0.1.24" +version = "0.1.26" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "719b953e2095829ee67db738b3bfa9fa368c94900df327b3f07fe6e794d2fe1f" +checksum = "56f7d92ca342cea22a06f2121d944b4fd82af56988c270852495420f961d4ace" [[package]] name = "rustc-hash" From 9ee3b29a1173cafff2e80a6ede1adbd6a4842410 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Tue, 5 Aug 2025 07:28:23 +0000 Subject: [PATCH 056/294] build: Bump crate-ci/typos from 1.34.0 to 1.35.1 Bumps [crate-ci/typos](https://github.com/crate-ci/typos) from 1.34.0 to 1.35.1. - [Release notes](https://github.com/crate-ci/typos/releases) - [Changelog](https://github.com/crate-ci/typos/blob/master/CHANGELOG.md) - [Commits](https://github.com/crate-ci/typos/compare/v1.34.0...v1.35.1) --- updated-dependencies: - dependency-name: crate-ci/typos dependency-version: 1.35.1 dependency-type: direct:production update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] --- .github/workflows/quality.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/quality.yaml b/.github/workflows/quality.yaml index 0ece7fa690..81ad38b626 100644 --- a/.github/workflows/quality.yaml +++ b/.github/workflows/quality.yaml @@ -152,4 +152,4 @@ jobs: steps: - uses: actions/checkout@v4 # Executes "typos ." - - uses: crate-ci/typos@v1.34.0 + - uses: crate-ci/typos@v1.35.1 From e2d2b2f2f05d9a6b1da32abcd05a58d9cd5c3975 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Tue, 5 Aug 2025 11:26:54 +0000 Subject: [PATCH 057/294] build: Bump rand from 0.9.1 to 0.9.2 Bumps [rand](https://github.com/rust-random/rand) from 0.9.1 to 0.9.2. - [Release notes](https://github.com/rust-random/rand/releases) - [Changelog](https://github.com/rust-random/rand/blob/master/CHANGELOG.md) - [Commits](https://github.com/rust-random/rand/compare/rand_core-0.9.1...rand_core-0.9.2) --- updated-dependencies: - dependency-name: rand dependency-version: 0.9.2 dependency-type: indirect update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] --- Cargo.lock | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index d464823498..1a0499d850 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1701,9 +1701,9 @@ checksum = "69cdb34c158ceb288df11e18b4bd39de994f6657d83847bdffdbd7f346754b0f" [[package]] name = "rand" -version = "0.9.1" +version = "0.9.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9fbfd9d094a40bf3ae768db9361049ace4c0e04a4fd6b359518bd7b73a73dd97" +checksum = "6db2770f06117d490610c7488547d543617b21bfa07796d7a12f6f1bd53850d1" dependencies = [ "rand_chacha", "rand_core", From cda1ea53a56b30f77d5982c42ea170afd3e56f06 Mon Sep 17 00:00:00 2001 From: Bo Chen Date: Tue, 22 Jul 2025 01:20:31 +0000 Subject: [PATCH 058/294] vmm: acpi: Make 'create_acpi_tables' more flexible Now 'create_acpi_tables_internal()' can generate ACPI tables with different dsdt offset, so that it can be reused for generating ACPI tables for `fw_cfg`. Signed-off-by: Bo Chen Signed-off-by: Alex Orozco --- vmm/src/acpi.rs | 107 ++++++++++++++++++++++++------------------------ 1 file changed, 54 insertions(+), 53 deletions(-) diff --git a/vmm/src/acpi.rs b/vmm/src/acpi.rs index e59b63f58b..27d52abeb4 100644 --- a/vmm/src/acpi.rs +++ b/vmm/src/acpi.rs @@ -626,41 +626,32 @@ fn create_viot_table(iommu_bdf: &PciBdf, devices_bdf: &[PciBdf]) -> Sdt { viot } -pub fn create_acpi_tables( - guest_mem: &GuestMemoryMmap, +fn create_acpi_tables_internal( + dsdt_offset: GuestAddress, device_manager: &Arc>, cpu_manager: &Arc>, memory_manager: &Arc>, numa_nodes: &NumaNodes, tpm_enabled: bool, -) -> GuestAddress { - trace_scoped!("create_acpi_tables"); - - let start_time = Instant::now(); - let rsdp_offset = arch::layout::RSDP_POINTER; +) -> (Rsdp, Vec, Vec) { + // Generated bytes for ACPI tables + let mut tables_bytes: Vec = Vec::new(); let mut tables: Vec = Vec::new(); // DSDT let dsdt = create_dsdt_table(device_manager, cpu_manager, memory_manager); - let dsdt_offset = rsdp_offset.checked_add(Rsdp::len() as u64).unwrap(); - guest_mem - .write_slice(dsdt.as_slice(), dsdt_offset) - .expect("Error writing DSDT table"); + tables_bytes.extend_from_slice(dsdt.as_slice()); // FACP aka FADT let facp = create_facp_table(dsdt_offset, device_manager); let facp_offset = dsdt_offset.checked_add(dsdt.len() as u64).unwrap(); - guest_mem - .write_slice(facp.as_slice(), facp_offset) - .expect("Error writing FACP table"); + tables_bytes.extend_from_slice(facp.as_slice()); tables.push(facp_offset.0); // MADT let madt = cpu_manager.lock().unwrap().create_madt(); let madt_offset = facp_offset.checked_add(facp.len() as u64).unwrap(); - guest_mem - .write_slice(madt.as_slice(), madt_offset) - .expect("Error writing MADT table"); + tables_bytes.extend_from_slice(madt.as_slice()); tables.push(madt_offset.0); let mut prev_tbl_len = madt.len() as u64; let mut prev_tbl_off = madt_offset; @@ -670,9 +661,7 @@ pub fn create_acpi_tables( { let pptt = cpu_manager.lock().unwrap().create_pptt(); let pptt_offset = prev_tbl_off.checked_add(prev_tbl_len).unwrap(); - guest_mem - .write_slice(pptt.as_slice(), pptt_offset) - .expect("Error writing PPTT table"); + tables_bytes.extend_from_slice(pptt.as_slice()); tables.push(pptt_offset.0); prev_tbl_len = pptt.len() as u64; prev_tbl_off = pptt_offset; @@ -683,9 +672,7 @@ pub fn create_acpi_tables( { let gtdt = create_gtdt_table(); let gtdt_offset = prev_tbl_off.checked_add(prev_tbl_len).unwrap(); - guest_mem - .write_slice(gtdt.as_slice(), gtdt_offset) - .expect("Error writing GTDT table"); + tables_bytes.extend_from_slice(gtdt.as_slice()); tables.push(gtdt_offset.0); prev_tbl_len = gtdt.len() as u64; prev_tbl_off = gtdt_offset; @@ -694,9 +681,7 @@ pub fn create_acpi_tables( // MCFG let mcfg = create_mcfg_table(device_manager.lock().unwrap().pci_segments()); let mcfg_offset = prev_tbl_off.checked_add(prev_tbl_len).unwrap(); - guest_mem - .write_slice(mcfg.as_slice(), mcfg_offset) - .expect("Error writing MCFG table"); + tables_bytes.extend_from_slice(mcfg.as_slice()); tables.push(mcfg_offset.0); prev_tbl_len = mcfg.len() as u64; prev_tbl_off = mcfg_offset; @@ -728,9 +713,7 @@ pub fn create_acpi_tables( // SPCR let spcr = create_spcr_table(serial_device_addr, serial_device_irq); let spcr_offset = prev_tbl_off.checked_add(prev_tbl_len).unwrap(); - guest_mem - .write_slice(spcr.as_slice(), spcr_offset) - .expect("Error writing SPCR table"); + tables_bytes.extend_from_slice(spcr.as_slice()); tables.push(spcr_offset.0); prev_tbl_len = spcr.len() as u64; prev_tbl_off = spcr_offset; @@ -738,9 +721,7 @@ pub fn create_acpi_tables( // DBG2 let dbg2 = create_dbg2_table(serial_device_addr); let dbg2_offset = prev_tbl_off.checked_add(prev_tbl_len).unwrap(); - guest_mem - .write_slice(dbg2.as_slice(), dbg2_offset) - .expect("Error writing DBG2 table"); + tables_bytes.extend_from_slice(dbg2.as_slice()); tables.push(dbg2_offset.0); prev_tbl_len = dbg2.len() as u64; prev_tbl_off = dbg2_offset; @@ -750,9 +731,7 @@ pub fn create_acpi_tables( // TPM2 Table let tpm2 = create_tpm2_table(); let tpm2_offset = prev_tbl_off.checked_add(prev_tbl_len).unwrap(); - guest_mem - .write_slice(tpm2.as_slice(), tpm2_offset) - .expect("Error writing TPM2 table"); + tables_bytes.extend_from_slice(tpm2.as_slice()); tables.push(tpm2_offset.0); prev_tbl_len = tpm2.len() as u64; @@ -770,17 +749,13 @@ pub fn create_acpi_tables( topology, ); let srat_offset = prev_tbl_off.checked_add(prev_tbl_len).unwrap(); - guest_mem - .write_slice(srat.as_slice(), srat_offset) - .expect("Error writing SRAT table"); + tables_bytes.extend_from_slice(srat.as_slice()); tables.push(srat_offset.0); // SLIT let slit = create_slit_table(numa_nodes); let slit_offset = srat_offset.checked_add(srat.len() as u64).unwrap(); - guest_mem - .write_slice(slit.as_slice(), slit_offset) - .expect("Error writing SLIT table"); + tables_bytes.extend_from_slice(slit.as_slice()); tables.push(slit_offset.0); prev_tbl_len = slit.len() as u64; @@ -791,9 +766,7 @@ pub fn create_acpi_tables( { let iort = create_iort_table(device_manager.lock().unwrap().pci_segments()); let iort_offset = prev_tbl_off.checked_add(prev_tbl_len).unwrap(); - guest_mem - .write_slice(iort.as_slice(), iort_offset) - .expect("Error writing IORT table"); + tables_bytes.extend_from_slice(iort.as_slice()); tables.push(iort_offset.0); prev_tbl_len = iort.len() as u64; prev_tbl_off = iort_offset; @@ -805,9 +778,7 @@ pub fn create_acpi_tables( let viot = create_viot_table(iommu_bdf, devices_bdf); let viot_offset = prev_tbl_off.checked_add(prev_tbl_len).unwrap(); - guest_mem - .write_slice(viot.as_slice(), viot_offset) - .expect("Error writing VIOT table"); + tables_bytes.extend_from_slice(viot.as_slice()); tables.push(viot_offset.0); prev_tbl_len = viot.len() as u64; prev_tbl_off = viot_offset; @@ -815,26 +786,56 @@ pub fn create_acpi_tables( // XSDT let mut xsdt = Sdt::new(*b"XSDT", 36, 1, *b"CLOUDH", *b"CHXSDT ", 1); - for table in tables { - xsdt.append(table); + for table in &tables { + xsdt.append(*table); } xsdt.update_checksum(); let xsdt_offset = prev_tbl_off.checked_add(prev_tbl_len).unwrap(); - guest_mem - .write_slice(xsdt.as_slice(), xsdt_offset) - .expect("Error writing XSDT table"); + tables_bytes.extend_from_slice(xsdt.as_slice()); // RSDP let rsdp = Rsdp::new(*b"CLOUDH", xsdt_offset.0); + + (rsdp, tables_bytes, tables) +} + +pub fn create_acpi_tables( + guest_mem: &GuestMemoryMmap, + device_manager: &Arc>, + cpu_manager: &Arc>, + memory_manager: &Arc>, + numa_nodes: &NumaNodes, + tpm_enabled: bool, +) -> GuestAddress { + trace_scoped!("create_acpi_tables"); + + let start_time = Instant::now(); + let rsdp_offset = arch::layout::RSDP_POINTER; + let dsdt_offset = rsdp_offset.checked_add(Rsdp::len() as u64).unwrap(); + + let (rsdp, tables_bytes, _tables_offset) = create_acpi_tables_internal( + dsdt_offset, + device_manager, + cpu_manager, + memory_manager, + numa_nodes, + tpm_enabled, + ); + guest_mem .write_slice(rsdp.as_bytes(), rsdp_offset) .expect("Error writing RSDP"); + guest_mem + .write_slice(tables_bytes.as_slice(), dsdt_offset) + .expect("Error writing ACPI tables"); + info!( "Generated ACPI tables: took {}µs size = {}", Instant::now().duration_since(start_time).as_micros(), - xsdt_offset.0 + xsdt.len() as u64 - rsdp_offset.0 + Rsdp::len() + tables_bytes.len(), ); + rsdp_offset } From abcec231eb5732dc2f9c398cb4175e501135af6b Mon Sep 17 00:00:00 2001 From: Bo Chen Date: Mon, 28 Jul 2025 23:14:11 +0000 Subject: [PATCH 059/294] vmm: acpi: Clarify variable naming in 'create_acpi_tables_internal' When generating ACPI tables, we are not using "offset", instead we are using (guest physical) addresses. Signed-off-by: Bo Chen --- vmm/src/acpi.rs | 90 ++++++++++++++++++++++++------------------------- 1 file changed, 45 insertions(+), 45 deletions(-) diff --git a/vmm/src/acpi.rs b/vmm/src/acpi.rs index 27d52abeb4..9d7106b8f7 100644 --- a/vmm/src/acpi.rs +++ b/vmm/src/acpi.rs @@ -627,7 +627,7 @@ fn create_viot_table(iommu_bdf: &PciBdf, devices_bdf: &[PciBdf]) -> Sdt { } fn create_acpi_tables_internal( - dsdt_offset: GuestAddress, + dsdt_addr: GuestAddress, device_manager: &Arc>, cpu_manager: &Arc>, memory_manager: &Arc>, @@ -643,48 +643,48 @@ fn create_acpi_tables_internal( tables_bytes.extend_from_slice(dsdt.as_slice()); // FACP aka FADT - let facp = create_facp_table(dsdt_offset, device_manager); - let facp_offset = dsdt_offset.checked_add(dsdt.len() as u64).unwrap(); + let facp = create_facp_table(dsdt_addr, device_manager); + let facp_addr = dsdt_addr.checked_add(dsdt.len() as u64).unwrap(); tables_bytes.extend_from_slice(facp.as_slice()); - tables.push(facp_offset.0); + tables.push(facp_addr.0); // MADT let madt = cpu_manager.lock().unwrap().create_madt(); - let madt_offset = facp_offset.checked_add(facp.len() as u64).unwrap(); + let madt_addr = facp_addr.checked_add(facp.len() as u64).unwrap(); tables_bytes.extend_from_slice(madt.as_slice()); - tables.push(madt_offset.0); + tables.push(madt_addr.0); let mut prev_tbl_len = madt.len() as u64; - let mut prev_tbl_off = madt_offset; + let mut prev_tbl_addr = madt_addr; // PPTT #[cfg(target_arch = "aarch64")] { let pptt = cpu_manager.lock().unwrap().create_pptt(); - let pptt_offset = prev_tbl_off.checked_add(prev_tbl_len).unwrap(); + let pptt_addr = prev_tbl_addr.checked_add(prev_tbl_len).unwrap(); tables_bytes.extend_from_slice(pptt.as_slice()); - tables.push(pptt_offset.0); + tables.push(pptt_addr.0); prev_tbl_len = pptt.len() as u64; - prev_tbl_off = pptt_offset; + prev_tbl_addr = pptt_addr; } // GTDT #[cfg(target_arch = "aarch64")] { let gtdt = create_gtdt_table(); - let gtdt_offset = prev_tbl_off.checked_add(prev_tbl_len).unwrap(); + let gtdt_addr = prev_tbl_addr.checked_add(prev_tbl_len).unwrap(); tables_bytes.extend_from_slice(gtdt.as_slice()); - tables.push(gtdt_offset.0); + tables.push(gtdt_addr.0); prev_tbl_len = gtdt.len() as u64; - prev_tbl_off = gtdt_offset; + prev_tbl_addr = gtdt_addr; } // MCFG let mcfg = create_mcfg_table(device_manager.lock().unwrap().pci_segments()); - let mcfg_offset = prev_tbl_off.checked_add(prev_tbl_len).unwrap(); + let mcfg_addr = prev_tbl_addr.checked_add(prev_tbl_len).unwrap(); tables_bytes.extend_from_slice(mcfg.as_slice()); - tables.push(mcfg_offset.0); + tables.push(mcfg_addr.0); prev_tbl_len = mcfg.len() as u64; - prev_tbl_off = mcfg_offset; + prev_tbl_addr = mcfg_addr; // SPCR and DBG2 #[cfg(target_arch = "aarch64")] @@ -712,30 +712,30 @@ fn create_acpi_tables_internal( // SPCR let spcr = create_spcr_table(serial_device_addr, serial_device_irq); - let spcr_offset = prev_tbl_off.checked_add(prev_tbl_len).unwrap(); + let spcr_addr = prev_tbl_addr.checked_add(prev_tbl_len).unwrap(); tables_bytes.extend_from_slice(spcr.as_slice()); - tables.push(spcr_offset.0); + tables.push(spcr_addr.0); prev_tbl_len = spcr.len() as u64; - prev_tbl_off = spcr_offset; + prev_tbl_addr = spcr_addr; // DBG2 let dbg2 = create_dbg2_table(serial_device_addr); - let dbg2_offset = prev_tbl_off.checked_add(prev_tbl_len).unwrap(); + let dbg2_addr = prev_tbl_addr.checked_add(prev_tbl_len).unwrap(); tables_bytes.extend_from_slice(dbg2.as_slice()); - tables.push(dbg2_offset.0); + tables.push(dbg2_addr.0); prev_tbl_len = dbg2.len() as u64; - prev_tbl_off = dbg2_offset; + prev_tbl_addr = dbg2_addr; } if tpm_enabled { // TPM2 Table let tpm2 = create_tpm2_table(); - let tpm2_offset = prev_tbl_off.checked_add(prev_tbl_len).unwrap(); + let tpm2_addr = prev_tbl_addr.checked_add(prev_tbl_len).unwrap(); tables_bytes.extend_from_slice(tpm2.as_slice()); - tables.push(tpm2_offset.0); + tables.push(tpm2_addr.0); prev_tbl_len = tpm2.len() as u64; - prev_tbl_off = tpm2_offset; + prev_tbl_addr = tpm2_addr; } // SRAT and SLIT // Only created if the NUMA nodes list is not empty. @@ -748,28 +748,28 @@ fn create_acpi_tables_internal( #[cfg(target_arch = "x86_64")] topology, ); - let srat_offset = prev_tbl_off.checked_add(prev_tbl_len).unwrap(); + let srat_addr = prev_tbl_addr.checked_add(prev_tbl_len).unwrap(); tables_bytes.extend_from_slice(srat.as_slice()); - tables.push(srat_offset.0); + tables.push(srat_addr.0); // SLIT let slit = create_slit_table(numa_nodes); - let slit_offset = srat_offset.checked_add(srat.len() as u64).unwrap(); + let slit_addr = srat_addr.checked_add(srat.len() as u64).unwrap(); tables_bytes.extend_from_slice(slit.as_slice()); - tables.push(slit_offset.0); + tables.push(slit_addr.0); prev_tbl_len = slit.len() as u64; - prev_tbl_off = slit_offset; + prev_tbl_addr = slit_addr; }; #[cfg(target_arch = "aarch64")] { let iort = create_iort_table(device_manager.lock().unwrap().pci_segments()); - let iort_offset = prev_tbl_off.checked_add(prev_tbl_len).unwrap(); + let iort_addr = prev_tbl_addr.checked_add(prev_tbl_len).unwrap(); tables_bytes.extend_from_slice(iort.as_slice()); - tables.push(iort_offset.0); + tables.push(iort_addr.0); prev_tbl_len = iort.len() as u64; - prev_tbl_off = iort_offset; + prev_tbl_addr = iort_addr; } // VIOT @@ -777,11 +777,11 @@ fn create_acpi_tables_internal( { let viot = create_viot_table(iommu_bdf, devices_bdf); - let viot_offset = prev_tbl_off.checked_add(prev_tbl_len).unwrap(); + let viot_addr = prev_tbl_addr.checked_add(prev_tbl_len).unwrap(); tables_bytes.extend_from_slice(viot.as_slice()); - tables.push(viot_offset.0); + tables.push(viot_addr.0); prev_tbl_len = viot.len() as u64; - prev_tbl_off = viot_offset; + prev_tbl_addr = viot_addr; } // XSDT @@ -790,11 +790,11 @@ fn create_acpi_tables_internal( xsdt.append(*table); } xsdt.update_checksum(); - let xsdt_offset = prev_tbl_off.checked_add(prev_tbl_len).unwrap(); + let xsdt_addr = prev_tbl_addr.checked_add(prev_tbl_len).unwrap(); tables_bytes.extend_from_slice(xsdt.as_slice()); // RSDP - let rsdp = Rsdp::new(*b"CLOUDH", xsdt_offset.0); + let rsdp = Rsdp::new(*b"CLOUDH", xsdt_addr.0); (rsdp, tables_bytes, tables) } @@ -810,11 +810,11 @@ pub fn create_acpi_tables( trace_scoped!("create_acpi_tables"); let start_time = Instant::now(); - let rsdp_offset = arch::layout::RSDP_POINTER; - let dsdt_offset = rsdp_offset.checked_add(Rsdp::len() as u64).unwrap(); + let rsdp_addr = arch::layout::RSDP_POINTER; + let dsdt_addr = rsdp_addr.checked_add(Rsdp::len() as u64).unwrap(); - let (rsdp, tables_bytes, _tables_offset) = create_acpi_tables_internal( - dsdt_offset, + let (rsdp, tables_bytes, _tables_addr) = create_acpi_tables_internal( + dsdt_addr, device_manager, cpu_manager, memory_manager, @@ -823,11 +823,11 @@ pub fn create_acpi_tables( ); guest_mem - .write_slice(rsdp.as_bytes(), rsdp_offset) + .write_slice(rsdp.as_bytes(), rsdp_addr) .expect("Error writing RSDP"); guest_mem - .write_slice(tables_bytes.as_slice(), dsdt_offset) + .write_slice(tables_bytes.as_slice(), dsdt_addr) .expect("Error writing ACPI tables"); info!( @@ -836,7 +836,7 @@ pub fn create_acpi_tables( Rsdp::len() + tables_bytes.len(), ); - rsdp_offset + rsdp_addr } #[cfg(feature = "tdx")] From 394fd230b06d0ddd832677daae19f6e3f3fd8085 Mon Sep 17 00:00:00 2001 From: Bo Chen Date: Mon, 28 Jul 2025 23:18:31 +0000 Subject: [PATCH 060/294] vmm: acpi: Clarify the return of 'create_acpi_tables_internal' Signed-off-by: Bo Chen --- vmm/src/acpi.rs | 42 +++++++++++++++++++++++++----------------- 1 file changed, 25 insertions(+), 17 deletions(-) diff --git a/vmm/src/acpi.rs b/vmm/src/acpi.rs index 9d7106b8f7..2185ee8b32 100644 --- a/vmm/src/acpi.rs +++ b/vmm/src/acpi.rs @@ -626,6 +626,13 @@ fn create_viot_table(iommu_bdf: &PciBdf, devices_bdf: &[PciBdf]) -> Sdt { viot } +// Generate ACPI tables based on the given DSDT address +// +// # Returns +// +// * `Rsdp` is the generated RSDP. +// * `Vec` contains the generated bytes for ACPI tables. +// * `Vec` contains a list of table pointers stored in XSDT. fn create_acpi_tables_internal( dsdt_addr: GuestAddress, device_manager: &Arc>, @@ -636,7 +643,8 @@ fn create_acpi_tables_internal( ) -> (Rsdp, Vec, Vec) { // Generated bytes for ACPI tables let mut tables_bytes: Vec = Vec::new(); - let mut tables: Vec = Vec::new(); + // List of table pointers stored in XSDT + let mut xsdt_table_pointers: Vec = Vec::new(); // DSDT let dsdt = create_dsdt_table(device_manager, cpu_manager, memory_manager); @@ -646,13 +654,13 @@ fn create_acpi_tables_internal( let facp = create_facp_table(dsdt_addr, device_manager); let facp_addr = dsdt_addr.checked_add(dsdt.len() as u64).unwrap(); tables_bytes.extend_from_slice(facp.as_slice()); - tables.push(facp_addr.0); + xsdt_table_pointers.push(facp_addr.0); // MADT let madt = cpu_manager.lock().unwrap().create_madt(); let madt_addr = facp_addr.checked_add(facp.len() as u64).unwrap(); tables_bytes.extend_from_slice(madt.as_slice()); - tables.push(madt_addr.0); + xsdt_table_pointers.push(madt_addr.0); let mut prev_tbl_len = madt.len() as u64; let mut prev_tbl_addr = madt_addr; @@ -662,7 +670,7 @@ fn create_acpi_tables_internal( let pptt = cpu_manager.lock().unwrap().create_pptt(); let pptt_addr = prev_tbl_addr.checked_add(prev_tbl_len).unwrap(); tables_bytes.extend_from_slice(pptt.as_slice()); - tables.push(pptt_addr.0); + xsdt_table_pointers.push(pptt_addr.0); prev_tbl_len = pptt.len() as u64; prev_tbl_addr = pptt_addr; } @@ -673,7 +681,7 @@ fn create_acpi_tables_internal( let gtdt = create_gtdt_table(); let gtdt_addr = prev_tbl_addr.checked_add(prev_tbl_len).unwrap(); tables_bytes.extend_from_slice(gtdt.as_slice()); - tables.push(gtdt_addr.0); + xsdt_table_pointers.push(gtdt_addr.0); prev_tbl_len = gtdt.len() as u64; prev_tbl_addr = gtdt_addr; } @@ -682,7 +690,7 @@ fn create_acpi_tables_internal( let mcfg = create_mcfg_table(device_manager.lock().unwrap().pci_segments()); let mcfg_addr = prev_tbl_addr.checked_add(prev_tbl_len).unwrap(); tables_bytes.extend_from_slice(mcfg.as_slice()); - tables.push(mcfg_addr.0); + xsdt_table_pointers.push(mcfg_addr.0); prev_tbl_len = mcfg.len() as u64; prev_tbl_addr = mcfg_addr; @@ -714,7 +722,7 @@ fn create_acpi_tables_internal( let spcr = create_spcr_table(serial_device_addr, serial_device_irq); let spcr_addr = prev_tbl_addr.checked_add(prev_tbl_len).unwrap(); tables_bytes.extend_from_slice(spcr.as_slice()); - tables.push(spcr_addr.0); + xsdt_table_pointers.push(spcr_addr.0); prev_tbl_len = spcr.len() as u64; prev_tbl_addr = spcr_addr; @@ -722,7 +730,7 @@ fn create_acpi_tables_internal( let dbg2 = create_dbg2_table(serial_device_addr); let dbg2_addr = prev_tbl_addr.checked_add(prev_tbl_len).unwrap(); tables_bytes.extend_from_slice(dbg2.as_slice()); - tables.push(dbg2_addr.0); + xsdt_table_pointers.push(dbg2_addr.0); prev_tbl_len = dbg2.len() as u64; prev_tbl_addr = dbg2_addr; } @@ -732,7 +740,7 @@ fn create_acpi_tables_internal( let tpm2 = create_tpm2_table(); let tpm2_addr = prev_tbl_addr.checked_add(prev_tbl_len).unwrap(); tables_bytes.extend_from_slice(tpm2.as_slice()); - tables.push(tpm2_addr.0); + xsdt_table_pointers.push(tpm2_addr.0); prev_tbl_len = tpm2.len() as u64; prev_tbl_addr = tpm2_addr; @@ -750,13 +758,13 @@ fn create_acpi_tables_internal( ); let srat_addr = prev_tbl_addr.checked_add(prev_tbl_len).unwrap(); tables_bytes.extend_from_slice(srat.as_slice()); - tables.push(srat_addr.0); + xsdt_table_pointers.push(srat_addr.0); // SLIT let slit = create_slit_table(numa_nodes); let slit_addr = srat_addr.checked_add(srat.len() as u64).unwrap(); tables_bytes.extend_from_slice(slit.as_slice()); - tables.push(slit_addr.0); + xsdt_table_pointers.push(slit_addr.0); prev_tbl_len = slit.len() as u64; prev_tbl_addr = slit_addr; @@ -767,7 +775,7 @@ fn create_acpi_tables_internal( let iort = create_iort_table(device_manager.lock().unwrap().pci_segments()); let iort_addr = prev_tbl_addr.checked_add(prev_tbl_len).unwrap(); tables_bytes.extend_from_slice(iort.as_slice()); - tables.push(iort_addr.0); + xsdt_table_pointers.push(iort_addr.0); prev_tbl_len = iort.len() as u64; prev_tbl_addr = iort_addr; } @@ -779,15 +787,15 @@ fn create_acpi_tables_internal( let viot_addr = prev_tbl_addr.checked_add(prev_tbl_len).unwrap(); tables_bytes.extend_from_slice(viot.as_slice()); - tables.push(viot_addr.0); + xsdt_table_pointers.push(viot_addr.0); prev_tbl_len = viot.len() as u64; prev_tbl_addr = viot_addr; } // XSDT let mut xsdt = Sdt::new(*b"XSDT", 36, 1, *b"CLOUDH", *b"CHXSDT ", 1); - for table in &tables { - xsdt.append(*table); + for table_pointer in &xsdt_table_pointers { + xsdt.append(*table_pointer); } xsdt.update_checksum(); let xsdt_addr = prev_tbl_addr.checked_add(prev_tbl_len).unwrap(); @@ -796,7 +804,7 @@ fn create_acpi_tables_internal( // RSDP let rsdp = Rsdp::new(*b"CLOUDH", xsdt_addr.0); - (rsdp, tables_bytes, tables) + (rsdp, tables_bytes, xsdt_table_pointers) } pub fn create_acpi_tables( @@ -813,7 +821,7 @@ pub fn create_acpi_tables( let rsdp_addr = arch::layout::RSDP_POINTER; let dsdt_addr = rsdp_addr.checked_add(Rsdp::len() as u64).unwrap(); - let (rsdp, tables_bytes, _tables_addr) = create_acpi_tables_internal( + let (rsdp, tables_bytes, _xsdt_table_pointers) = create_acpi_tables_internal( dsdt_addr, device_manager, cpu_manager, From cd2c43b4896670cc5bd7ef60a12732c81fd34bdd Mon Sep 17 00:00:00 2001 From: Songqian Li Date: Wed, 6 Aug 2025 16:41:29 +0800 Subject: [PATCH 061/294] misc: Fix beta clippy errors Fix clippy error: "error: manual implementation of `.is_multiple_of() `" from rustc 1.90.0-beta.1 (788da80fc 2025-08-04). Signed-off-by: Songqian Li --- arch/src/riscv64/fdt.rs | 5 +---- block/src/lib.rs | 2 +- block/src/qcow/mod.rs | 4 ++-- block/src/qcow/raw_file.rs | 6 +++--- hypervisor/src/kvm/aarch64/gic/dist_regs.rs | 2 +- virtio-devices/src/balloon.rs | 2 +- virtio-devices/src/mem.rs | 14 +++++++------- virtio-devices/src/net.rs | 2 +- virtio-devices/src/vhost_user/net.rs | 2 +- vm-allocator/src/address.rs | 2 +- vm-migration/src/protocol.rs | 2 +- vmm/src/igvm/igvm_loader.rs | 6 +++--- vmm/src/memory_manager.rs | 2 +- 13 files changed, 24 insertions(+), 27 deletions(-) diff --git a/arch/src/riscv64/fdt.rs b/arch/src/riscv64/fdt.rs index 1a7e2e5f46..580aaa7d3f 100644 --- a/arch/src/riscv64/fdt.rs +++ b/arch/src/riscv64/fdt.rs @@ -448,10 +448,7 @@ fn print_node(node: fdt_parser::node::FdtNode<'_, '_>, n_spaces: usize) { // - At first, try to convert it to CStr and print, // - If failed, print it as u32 array. let value_result = match CStr::from_bytes_with_nul(value) { - Ok(value_cstr) => match value_cstr.to_str() { - Ok(value_str) => Some(value_str), - Err(_e) => None, - }, + Ok(value_cstr) => value_cstr.to_str().ok(), Err(_e) => None, }; diff --git a/block/src/lib.rs b/block/src/lib.rs index aed9c0ab3c..4e10771c4d 100644 --- a/block/src/lib.rs +++ b/block/src/lib.rs @@ -431,7 +431,7 @@ impl Request { // In case it's not properly aligned, an intermediate buffer is // created with the correct alignment, and a copy from/to the // origin buffer is performed, depending on the type of operation. - let iov_base = if (origin_ptr.as_ptr() as u64) % SECTOR_SIZE != 0 { + let iov_base = if !(origin_ptr.as_ptr() as u64).is_multiple_of(SECTOR_SIZE) { let layout = Layout::from_size_align(data_len, SECTOR_SIZE as usize).unwrap(); // SAFETY: layout has non-zero size let aligned_ptr = unsafe { alloc_zeroed(layout) }; diff --git a/block/src/qcow/mod.rs b/block/src/qcow/mod.rs index 7733ff0ceb..4d84918e97 100644 --- a/block/src/qcow/mod.rs +++ b/block/src/qcow/mod.rs @@ -1705,12 +1705,12 @@ fn offset_is_cluster_boundary(offset: u64, cluster_bits: u32) -> Result<()> { // Ceiling of the division of `dividend`/`divisor`. fn div_round_up_u64(dividend: u64, divisor: u64) -> u64 { - dividend / divisor + u64::from(dividend % divisor != 0) + dividend / divisor + u64::from(!dividend.is_multiple_of(divisor)) } // Ceiling of the division of `dividend`/`divisor`. fn div_round_up_u32(dividend: u32, divisor: u32) -> u32 { - dividend / divisor + u32::from(dividend % divisor != 0) + dividend / divisor + u32::from(!dividend.is_multiple_of(divisor)) } fn convert_copy(reader: &mut R, writer: &mut W, offset: u64, size: u64) -> Result<()> diff --git a/block/src/qcow/raw_file.rs b/block/src/qcow/raw_file.rs index cb96376015..67bc99fca6 100644 --- a/block/src/qcow/raw_file.rs +++ b/block/src/qcow/raw_file.rs @@ -89,9 +89,9 @@ impl RawFile { let align64: u64 = self.alignment.try_into().unwrap(); - (self.position % align64 == 0) - && ((buf.as_ptr() as usize) % self.alignment == 0) - && (buf.len() % self.alignment == 0) + self.position.is_multiple_of(align64) + && (buf.as_ptr() as usize).is_multiple_of(self.alignment) + && buf.len().is_multiple_of(self.alignment) } pub fn set_len(&self, size: u64) -> std::io::Result<()> { diff --git a/hypervisor/src/kvm/aarch64/gic/dist_regs.rs b/hypervisor/src/kvm/aarch64/gic/dist_regs.rs index 9135ad0031..af2d6022c4 100644 --- a/hypervisor/src/kvm/aarch64/gic/dist_regs.rs +++ b/hypervisor/src/kvm/aarch64/gic/dist_regs.rs @@ -156,7 +156,7 @@ fn compute_reg_len(gic: &DeviceFd, reg: &DistReg, base: u32) -> Result { // that the model has. It is also the type of register where // a register relates to multiple interrupts. end = base + (reg.bpi as u32 * (num_irq - LAYOUT_IRQ_BASE) / 8); - if reg.bpi as u32 * (num_irq - LAYOUT_IRQ_BASE) % 8 > 0 { + if !(reg.bpi as u32 * (num_irq - LAYOUT_IRQ_BASE)).is_multiple_of(8) { end += REG_SIZE as u32; } } diff --git a/virtio-devices/src/balloon.rs b/virtio-devices/src/balloon.rs index 890dfbd9fc..2a34b6688f 100644 --- a/virtio-devices/src/balloon.rs +++ b/virtio-devices/src/balloon.rs @@ -265,7 +265,7 @@ impl BalloonEpollHandler { error!("The head contains the request type is not right"); return Err(Error::UnexpectedWriteOnlyDescriptor); } - if desc.len() as usize % data_chunk_size != 0 { + if !(desc.len() as usize).is_multiple_of(data_chunk_size) { error!("the request size {} is not right", desc.len()); return Err(Error::InvalidRequest); } diff --git a/virtio-devices/src/mem.rs b/virtio-devices/src/mem.rs index 5739a12fae..8d5830ac3c 100644 --- a/virtio-devices/src/mem.rs +++ b/virtio-devices/src/mem.rs @@ -193,35 +193,35 @@ unsafe impl ByteValued for VirtioMemConfig {} impl VirtioMemConfig { fn validate(&self) -> result::Result<(), Error> { - if self.addr % self.block_size != 0 { + if !self.addr.is_multiple_of(self.block_size) { return Err(Error::ValidateError(anyhow!( "addr 0x{:x} is not aligned on block_size 0x{:x}", self.addr, self.block_size ))); } - if self.region_size % self.block_size != 0 { + if !self.region_size.is_multiple_of(self.block_size) { return Err(Error::ValidateError(anyhow!( "region_size 0x{:x} is not aligned on block_size 0x{:x}", self.region_size, self.block_size ))); } - if self.usable_region_size % self.block_size != 0 { + if !self.usable_region_size.is_multiple_of(self.block_size) { return Err(Error::ValidateError(anyhow!( "usable_region_size 0x{:x} is not aligned on block_size 0x{:x}", self.usable_region_size, self.block_size ))); } - if self.plugged_size % self.block_size != 0 { + if !self.plugged_size.is_multiple_of(self.block_size) { return Err(Error::ValidateError(anyhow!( "plugged_size 0x{:x} is not aligned on block_size 0x{:x}", self.plugged_size, self.block_size ))); } - if self.requested_size % self.block_size != 0 { + if !self.requested_size.is_multiple_of(self.block_size) { return Err(Error::ValidateError(anyhow!( "requested_size 0x{:x} is not aligned on block_size 0x{:x}", self.requested_size, @@ -244,7 +244,7 @@ impl VirtioMemConfig { size, self.region_size ))); - } else if size % self.block_size != 0 { + } else if !size.is_multiple_of(self.block_size) { return Err(Error::ResizeError(anyhow!( "new size 0x{:x} is not aligned on block_size 0x{:x}", size, @@ -267,7 +267,7 @@ impl VirtioMemConfig { // Start address must be aligned on block_size, the size must be // greater than 0, and all blocks covered by the request must be // in the usable region. - if addr % self.block_size != 0 + if !addr.is_multiple_of(self.block_size) || size == 0 || (addr < self.addr || addr + size > self.addr + self.usable_region_size) { diff --git a/virtio-devices/src/net.rs b/virtio-devices/src/net.rs index 7d2a4a4597..950cedb519 100644 --- a/virtio-devices/src/net.rs +++ b/virtio-devices/src/net.rs @@ -706,7 +706,7 @@ impl VirtioDevice for Net { let num_queues = queues.len(); let event_idx = self.common.feature_acked(VIRTIO_RING_F_EVENT_IDX.into()); - if self.common.feature_acked(VIRTIO_NET_F_CTRL_VQ.into()) && num_queues % 2 != 0 { + if self.common.feature_acked(VIRTIO_NET_F_CTRL_VQ.into()) && !num_queues.is_multiple_of(2) { let ctrl_queue_index = num_queues - 1; let (_, mut ctrl_queue, ctrl_queue_evt) = queues.remove(ctrl_queue_index); diff --git a/virtio-devices/src/vhost_user/net.rs b/virtio-devices/src/vhost_user/net.rs index 930f557419..c52d5ca385 100644 --- a/virtio-devices/src/vhost_user/net.rs +++ b/virtio-devices/src/vhost_user/net.rs @@ -298,7 +298,7 @@ impl VirtioDevice for Net { let num_queues = queues.len(); let event_idx = self.common.feature_acked(VIRTIO_RING_F_EVENT_IDX.into()); - if self.common.feature_acked(VIRTIO_NET_F_CTRL_VQ.into()) && num_queues % 2 != 0 { + if self.common.feature_acked(VIRTIO_NET_F_CTRL_VQ.into()) && !num_queues.is_multiple_of(2) { let ctrl_queue_index = num_queues - 1; let (_, mut ctrl_queue, ctrl_queue_evt) = queues.remove(ctrl_queue_index); diff --git a/vm-allocator/src/address.rs b/vm-allocator/src/address.rs index a6e11dc63a..9a72afdf98 100644 --- a/vm-allocator/src/address.rs +++ b/vm-allocator/src/address.rs @@ -68,7 +68,7 @@ impl AddressAllocator { } fn align_address(&self, address: GuestAddress, alignment: GuestUsize) -> GuestAddress { - let align_adjust = if address.raw_value() % alignment != 0 { + let align_adjust = if !address.raw_value().is_multiple_of(alignment) { alignment - (address.raw_value() % alignment) } else { 0 diff --git a/vm-migration/src/protocol.rs b/vm-migration/src/protocol.rs index 274baf0397..094a8c1a30 100644 --- a/vm-migration/src/protocol.rs +++ b/vm-migration/src/protocol.rs @@ -264,7 +264,7 @@ impl MemoryRangeTable { } pub fn read_from(fd: &mut dyn Read, length: u64) -> Result { - assert!(length as usize % std::mem::size_of::() == 0); + assert!((length as usize).is_multiple_of(size_of::())); let mut data: Vec = Vec::new(); data.resize_with( diff --git a/vmm/src/igvm/igvm_loader.rs b/vmm/src/igvm/igvm_loader.rs index 75eeb33582..805bb94b41 100644 --- a/vmm/src/igvm/igvm_loader.rs +++ b/vmm/src/igvm/igvm_loader.rs @@ -66,8 +66,8 @@ enum ParameterAreaState { #[cfg(feature = "sev_snp")] fn igvm_memmap_from_ram_range(ram_range: (u64, u64)) -> IGVM_VHS_MEMORY_MAP_ENTRY { - assert!(ram_range.0 % HV_PAGE_SIZE == 0); - assert!((ram_range.1 - ram_range.0) % HV_PAGE_SIZE == 0); + assert!(ram_range.0.is_multiple_of(HV_PAGE_SIZE)); + assert!((ram_range.1 - ram_range.0).is_multiple_of(HV_PAGE_SIZE)); IGVM_VHS_MEMORY_MAP_ENTRY { starting_gpa_page_number: ram_range.0 / HV_PAGE_SIZE, @@ -179,7 +179,7 @@ pub fn load_igvm( data_type, data, } => { - debug_assert!(data.len() as u64 % HV_PAGE_SIZE == 0); + debug_assert!((data.len() as u64).is_multiple_of(HV_PAGE_SIZE)); // TODO: only 4k or empty page data supported right now assert!(data.len() as u64 == HV_PAGE_SIZE || data.is_empty()); diff --git a/vmm/src/memory_manager.rs b/vmm/src/memory_manager.rs index a5ab297182..2aa8e51046 100644 --- a/vmm/src/memory_manager.rs +++ b/vmm/src/memory_manager.rs @@ -1700,7 +1700,7 @@ impl MemoryManager { } // "Inserted" DIMM must have a size that is a multiple of 128MiB - if size % (128 << 20) != 0 { + if !size.is_multiple_of(128 << 20) { return Err(Error::InvalidSize); } From 530719a57a3556dcfac069e1f5091ba7dfda5f6d Mon Sep 17 00:00:00 2001 From: Songqian Li Date: Wed, 6 Aug 2025 17:29:56 +0800 Subject: [PATCH 062/294] build: Bump MSRV to 1.87.0 rustc 1.90.0-beta.1 (788da80fc 2025-08-04) suggests using library feature `unsigned_is_multiple_of`. It is stabled in Rust 1.87.0. Update image to 20250807-0 since MSRV in Dockerfile is updated. Signed-off-by: Songqian Li Signed-off-by: Bo Chen --- .github/workflows/build.yaml | 2 +- .github/workflows/docker-image.yaml | 2 +- .github/workflows/preview-riscv64.yaml | 2 +- .github/workflows/release.yaml | 2 +- Cargo.toml | 2 +- resources/Dockerfile | 2 +- scripts/dev_cli.sh | 2 +- 7 files changed, 7 insertions(+), 7 deletions(-) diff --git a/.github/workflows/build.yaml b/.github/workflows/build.yaml index 070650ba6e..297b2e153d 100644 --- a/.github/workflows/build.yaml +++ b/.github/workflows/build.yaml @@ -15,7 +15,7 @@ jobs: - stable - beta - nightly - - "1.83.0" + - "1.87.0" target: - x86_64-unknown-linux-gnu - x86_64-unknown-linux-musl diff --git a/.github/workflows/docker-image.yaml b/.github/workflows/docker-image.yaml index 6891d60997..b5bbdf4284 100644 --- a/.github/workflows/docker-image.yaml +++ b/.github/workflows/docker-image.yaml @@ -41,7 +41,7 @@ jobs: images: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }} # generate Docker tags based on the following events/attributes tags: | - type=raw,value=20250412-0 + type=raw,value=20250807-0 type=sha - name: Build and push diff --git a/.github/workflows/preview-riscv64.yaml b/.github/workflows/preview-riscv64.yaml index 84435402a8..6e4c5071e3 100644 --- a/.github/workflows/preview-riscv64.yaml +++ b/.github/workflows/preview-riscv64.yaml @@ -24,7 +24,7 @@ jobs: fetch-depth: 0 - name: Install Rust toolchain - run: /opt/scripts/exec-in-qemu.sh rustup default 1.83.0 + run: /opt/scripts/exec-in-qemu.sh rustup default 1.87.0 - name: Build ${{ matrix.module }} Module (kvm) run: /opt/scripts/exec-in-qemu.sh cargo rustc --locked -p ${{ matrix.module }} --no-default-features --features "kvm" -- -D warnings -D clippy::undocumented_unsafe_blocks -W clippy::assertions_on_result_states diff --git a/.github/workflows/release.yaml b/.github/workflows/release.yaml index 01d4d6d810..ef1eb3573c 100644 --- a/.github/workflows/release.yaml +++ b/.github/workflows/release.yaml @@ -45,7 +45,7 @@ jobs: target: ${{ matrix.platform.target }} args: ${{ matrix.platform.args }} strip: true - toolchain: "1.83.0" + toolchain: "1.87.0" - name: Copy Release Binaries if: github.event_name == 'create' && github.event.ref_type == 'tag' shell: bash diff --git a/Cargo.toml b/Cargo.toml index 4e1848010f..c19c070769 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -15,7 +15,7 @@ version = "47.0.0" # a.) A dependency requires it, # b.) If we want to use a new feature and that MSRV is at least 6 months old, # c.) There is a security issue that is addressed by the toolchain update. -rust-version = "1.83.0" +rust-version = "1.87.0" [profile.release] codegen-units = 1 diff --git a/resources/Dockerfile b/resources/Dockerfile index ca527857f6..a650c48211 100644 --- a/resources/Dockerfile +++ b/resources/Dockerfile @@ -8,7 +8,7 @@ FROM ubuntu:24.04 AS dev ARG TARGETARCH -ARG RUST_TOOLCHAIN="1.83.0" +ARG RUST_TOOLCHAIN="1.87.0" ARG CLH_SRC_DIR="/cloud-hypervisor" ARG CLH_BUILD_DIR="$CLH_SRC_DIR/build" ARG CARGO_REGISTRY_DIR="$CLH_BUILD_DIR/cargo_registry" diff --git a/scripts/dev_cli.sh b/scripts/dev_cli.sh index db560d529e..3349afa9d8 100755 --- a/scripts/dev_cli.sh +++ b/scripts/dev_cli.sh @@ -9,7 +9,7 @@ CLI_NAME="Cloud Hypervisor" CTR_IMAGE_TAG="ghcr.io/cloud-hypervisor/cloud-hypervisor" # Needs to match explicit version in docker-image.yaml workflow -CTR_IMAGE_VERSION="20250412-0" +CTR_IMAGE_VERSION="20250807-0" : "${CTR_IMAGE:=${CTR_IMAGE_TAG}:${CTR_IMAGE_VERSION}}" DOCKER_RUNTIME="docker" From 0bfa1603602e393a7c7849544a7c8f86ca787c4b Mon Sep 17 00:00:00 2001 From: Songqian Li Date: Thu, 7 Aug 2025 11:46:23 +0800 Subject: [PATCH 063/294] hypervisor: Replacing pdf link with new available link Replacing pdf link with new available link to fix the failed CI. Signed-off-by: Songqian Li --- hypervisor/src/kvm/aarch64/gic/dist_regs.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hypervisor/src/kvm/aarch64/gic/dist_regs.rs b/hypervisor/src/kvm/aarch64/gic/dist_regs.rs index af2d6022c4..0aa3da76de 100644 --- a/hypervisor/src/kvm/aarch64/gic/dist_regs.rs +++ b/hypervisor/src/kvm/aarch64/gic/dist_regs.rs @@ -11,7 +11,7 @@ use crate::kvm::kvm_bindings::{ /* Distributor registers as detailed at page 456 from - https://static.docs.arm.com/ihi0069/c/IHI0069C_gic_architecture_specification.pdf. + https://developer.arm.com/documentation/ihi0069/c/?lang=en. Address offsets are relative to the Distributor base address defined by the system memory map. Unless otherwise stated in the register description, all GIC registers are 32-bits wide. From 4f4940b02ac55f1fb48d9ca255ea7c4a7643eceb Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Thu, 7 Aug 2025 23:35:01 +0000 Subject: [PATCH 064/294] build: Bump crate-ci/typos from 1.35.1 to 1.35.2 Bumps [crate-ci/typos](https://github.com/crate-ci/typos) from 1.35.1 to 1.35.2. - [Release notes](https://github.com/crate-ci/typos/releases) - [Changelog](https://github.com/crate-ci/typos/blob/master/CHANGELOG.md) - [Commits](https://github.com/crate-ci/typos/compare/v1.35.1...v1.35.2) --- updated-dependencies: - dependency-name: crate-ci/typos dependency-version: 1.35.2 dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] --- .github/workflows/quality.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/quality.yaml b/.github/workflows/quality.yaml index 81ad38b626..b02daf8124 100644 --- a/.github/workflows/quality.yaml +++ b/.github/workflows/quality.yaml @@ -152,4 +152,4 @@ jobs: steps: - uses: actions/checkout@v4 # Executes "typos ." - - uses: crate-ci/typos@v1.35.1 + - uses: crate-ci/typos@v1.35.2 From 0d9a941a2cc60ea397a19709e246cf8b21172d2b Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Thu, 7 Aug 2025 22:04:48 +0000 Subject: [PATCH 065/294] build: Bump redox_users from 0.5.0 to 0.5.2 Bumps redox_users from 0.5.0 to 0.5.2. --- updated-dependencies: - dependency-name: redox_users dependency-version: 0.5.2 dependency-type: indirect update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] --- Cargo.lock | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 1a0499d850..5a372a29d6 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1765,9 +1765,9 @@ dependencies = [ [[package]] name = "redox_users" -version = "0.5.0" +version = "0.5.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dd6f9d3d47bdd2ad6945c5015a226ec6155d0bcdfd8f7cd29f86b71f8de99d2b" +checksum = "a4e608c6638b9c18977b00b475ac1f28d14e84b27d8d42f70e0bf1e3dec127ac" dependencies = [ "getrandom 0.2.15", "libredox", From f87772cd367ba2b9b5ebac5e4070878879cd948e Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Fri, 8 Aug 2025 23:31:16 +0000 Subject: [PATCH 066/294] build: Bump crate-ci/typos from 1.35.2 to 1.35.3 Bumps [crate-ci/typos](https://github.com/crate-ci/typos) from 1.35.2 to 1.35.3. - [Release notes](https://github.com/crate-ci/typos/releases) - [Changelog](https://github.com/crate-ci/typos/blob/master/CHANGELOG.md) - [Commits](https://github.com/crate-ci/typos/compare/v1.35.2...v1.35.3) --- updated-dependencies: - dependency-name: crate-ci/typos dependency-version: 1.35.3 dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] --- .github/workflows/quality.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/quality.yaml b/.github/workflows/quality.yaml index b02daf8124..b4cddb3ce9 100644 --- a/.github/workflows/quality.yaml +++ b/.github/workflows/quality.yaml @@ -152,4 +152,4 @@ jobs: steps: - uses: actions/checkout@v4 # Executes "typos ." - - uses: crate-ci/typos@v1.35.2 + - uses: crate-ci/typos@v1.35.3 From e718e0ad36da8147144a225b774a2420ebf70329 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Fri, 8 Aug 2025 23:37:02 +0000 Subject: [PATCH 067/294] build: Bump async-trait from 0.1.86 to 0.1.88 Bumps [async-trait](https://github.com/dtolnay/async-trait) from 0.1.86 to 0.1.88. - [Release notes](https://github.com/dtolnay/async-trait/releases) - [Commits](https://github.com/dtolnay/async-trait/compare/0.1.86...0.1.88) --- updated-dependencies: - dependency-name: async-trait dependency-version: 0.1.88 dependency-type: indirect update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] --- Cargo.lock | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 5a372a29d6..8173ec1fd1 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -244,9 +244,9 @@ checksum = "8b75356056920673b02621b35afd0f7dda9306d03c79a30f5c56c44cf256e3de" [[package]] name = "async-trait" -version = "0.1.86" +version = "0.1.88" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "644dd749086bf3771a2fbc5f256fdb982d53f011c7d5d560304eafeecebce79d" +checksum = "e539d3fca749fcee5236ab05e93a52867dd549cc157c8cb7f99595f3cedffdb5" dependencies = [ "proc-macro2", "quote", From 684fb1dfce04f877dd91efc0319f308ae158b5cb Mon Sep 17 00:00:00 2001 From: Songqian Li Date: Fri, 8 Aug 2025 11:34:03 +0800 Subject: [PATCH 068/294] ci: Improve link checker conditions Link checker excludes checks on local links and links with variables. Signed-off-by: Songqian Li --- .lychee.toml | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/.lychee.toml b/.lychee.toml index 9eb8f9fdf7..875a861826 100644 --- a/.lychee.toml +++ b/.lychee.toml @@ -14,8 +14,16 @@ exclude = [ # OSDev has added bot protection and accesses my result in 403 Forbidden. '^https://wiki.osdev.org', + # Exclude all pages with $ in the URL since $XXX is a variable + "\\$.*", + # Exclude local files + "file://.*", ] +# Exclude loopback addresses +exclude_loopback = true + + max_retries = 3 retry_wait_time = 5 From a637940be73376cedd691a4c79d44d623ae85081 Mon Sep 17 00:00:00 2001 From: Songqian Li Date: Fri, 8 Aug 2025 11:39:05 +0800 Subject: [PATCH 069/294] ci: Fix link check failed ### Errors in src/main.rs * [404] | Rejected status code (this depends on your "accept" configuration) : Not Found Signed-off-by: Songqian Li --- src/main.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/main.rs b/src/main.rs index 101da1706b..56b48d59b5 100644 --- a/src/main.rs +++ b/src/main.rs @@ -553,10 +553,10 @@ fn start_vmm(cmd_arguments: ArgMatches) -> Result, Error> { // handler safe functions (writing to stderr) and manipulating signals. unsafe { signal_hook::low_level::register(signal_hook::consts::SIGSYS, || { - eprint!( + eprintln!( "\n==== Possible seccomp violation ====\n\ Try running with `strace -ff` to identify the cause and open an issue: \ - https://github.com/cloud-hypervisor/cloud-hypervisor/issues/new\n" + https://github.com/cloud-hypervisor/cloud-hypervisor/issues/new" ); signal_hook::low_level::emulate_default_handler(SIGSYS).unwrap(); }) From 9825397350aa027f847f6d3003b72df34a370563 Mon Sep 17 00:00:00 2001 From: Demi Marie Obenour Date: Fri, 25 Jul 2025 21:56:02 -0400 Subject: [PATCH 070/294] misc: option_parser: allow quoting special characters The option parser did not allow quoting any special characters, which means that some strings could not be passed via command-line options. Fix this by allowing double quotes to quote all the special characters, such as '@', ',', '[', and ']'. '"' can be escaped by doubling it. The implementation uses some tricks to stay modular. Signed-off-by: Demi Marie Obenour --- option_parser/src/lib.rs | 188 +++++++++++++++++++++++++++++---------- 1 file changed, 140 insertions(+), 48 deletions(-) diff --git a/option_parser/src/lib.rs b/option_parser/src/lib.rs index f83524a20b..ff3d9ffffd 100644 --- a/option_parser/src/lib.rs +++ b/option_parser/src/lib.rs @@ -9,6 +9,23 @@ use std::str::FromStr; use thiserror::Error; +mod private_trait { + // Voldemort trait that dispatches to `FromStr::from_str` on externally-defined types + // and to custom parsing code for types in this module. + pub trait Parseable + where + Self: Sized, + { + type Err; + // Actually does the parsing, but panics if the input doesn't have + // balanced quotes. This is fine because split_commas checks that the + // input has balanced quotes, and option names cannot contain anything + // that split_commas treats as special. + fn from_str(input: &str) -> Result::Err>; + } +} +use private_trait::Parseable; + #[derive(Default)] pub struct OptionParser { options: HashMap, @@ -34,38 +51,34 @@ type OptionParserResult = std::result::Result; fn split_commas(s: &str) -> OptionParserResult> { let mut list: Vec = Vec::new(); - let mut opened_brackets = 0; + let mut opened_brackets = 0u64; let mut in_quotes = false; let mut current = String::new(); for c in s.trim().chars() { match c { - '[' => { - opened_brackets += 1; - current.push('['); - } + // In quotes, only '"' is special + '"' => in_quotes = !in_quotes, + _ if in_quotes => {} + '[' => opened_brackets += 1, ']' => { - opened_brackets -= 1; - if opened_brackets < 0 { + if opened_brackets < 1 { return Err(OptionParserError::InvalidSyntax(s.to_owned())); } - current.push(']'); + opened_brackets -= 1; } - '"' => in_quotes = !in_quotes, - ',' => { - if opened_brackets > 0 || in_quotes { - current.push(',') - } else { - list.push(current); - current = String::new(); - } + ',' if opened_brackets == 0 => { + list.push(current); + current = String::new(); + continue; } - c => current.push(c), - } + _ => {} + }; + current.push(c); } list.push(current); - if opened_brackets != 0 || in_quotes { + if in_quotes || opened_brackets != 0 { return Err(OptionParserError::InvalidSyntax(s.to_owned())); } @@ -86,7 +99,6 @@ impl OptionParser { for option in split_commas(input)?.iter() { let parts: Vec<&str> = option.splitn(2, '=').collect(); - match self.options.get_mut(parts[0]) { None => return Err(OptionParserError::UnknownOption(parts[0].to_owned())), Some(value) => { @@ -106,6 +118,12 @@ impl OptionParser { } pub fn add(&mut self, option: &str) -> &mut Self { + // Check that option=value has balanced + // quotes and brackets iff value does. + assert!( + !option.contains(['"', '[', ']', '=', ',']), + "forbidden character in option name" + ); self.options.insert( option.to_owned(), OptionParserValue { @@ -133,7 +151,13 @@ impl OptionParser { self.options .get(option) .and_then(|v| v.value.clone()) - .and_then(|s| if s.is_empty() { None } else { Some(s) }) + .and_then(|s| { + if s.is_empty() { + None + } else { + Some(dequote(&s)) + } + }) } pub fn is_set(&self, option: &str) -> bool { @@ -143,12 +167,18 @@ impl OptionParser { .is_some() } - pub fn convert(&self, option: &str) -> OptionParserResult> { - match self.get(option) { + pub fn convert(&self, option: &str) -> OptionParserResult> { + match self.options.get(option).and_then(|v| v.value.as_ref()) { None => Ok(None), - Some(v) => Ok(Some(v.parse().map_err(|_| { - OptionParserError::Conversion(option.to_owned(), v.to_owned()) - })?)), + Some(v) => { + Ok(if v.is_empty() { + None + } else { + Some(Parseable::from_str(v).map_err(|_| { + OptionParserError::Conversion(option.to_owned(), v.to_owned()) + })?) + }) + } } } } @@ -161,7 +191,7 @@ pub enum ToggleParseError { InvalidValue(String), } -impl FromStr for Toggle { +impl Parseable for Toggle { type Err = ToggleParseError; fn from_str(s: &str) -> std::result::Result { @@ -216,7 +246,7 @@ pub enum IntegerListParseError { InvalidValue(String), } -impl FromStr for IntegerList { +impl Parseable for IntegerList { type Err = IntegerListParseError; fn from_str(s: &str) -> std::result::Result { @@ -300,6 +330,7 @@ impl TupleValue for Vec { } } +#[derive(PartialEq, Eq, Debug)] pub struct Tuple(pub Vec<(S, T)>); #[derive(Error, Debug)] @@ -314,31 +345,39 @@ pub enum TupleError { InvalidInteger(#[source] ParseIntError), } -impl FromStr for Tuple { +impl Parseable for Tuple { type Err = TupleError; fn from_str(s: &str) -> std::result::Result { let mut list: Vec<(S, T)> = Vec::new(); - let body = s .trim() .strip_prefix('[') .and_then(|s| s.strip_suffix(']')) .ok_or_else(|| TupleError::InvalidValue(s.to_string()))?; - let tuples_list = split_commas(body).map_err(TupleError::SplitOutsideBrackets)?; for tuple in tuples_list.iter() { - let items: Vec<&str> = tuple.split('@').collect(); - - if items.len() != 2 { - return Err(TupleError::InvalidValue((*tuple).to_string())); + let mut in_quotes = false; + let mut last_idx = 0; + let mut first_val = None; + for (idx, c) in tuple.as_bytes().iter().enumerate() { + match c { + b'"' => in_quotes = !in_quotes, + b'@' if !in_quotes => { + if last_idx != 0 { + return Err(TupleError::InvalidValue((*tuple).to_string())); + } + first_val = Some(&tuple[last_idx..idx]); + last_idx = idx + 1; + } + _ => {} + } } - - let item1 = items[0] - .parse::() - .map_err(|_| TupleError::InvalidValue(items[0].to_owned()))?; - let item2 = TupleValue::parse_value(items[1])?; - + let item1 = ::from_str( + first_val.ok_or(TupleError::InvalidValue((*tuple).to_string()))?, + ) + .map_err(|_| TupleError::InvalidValue(first_val.unwrap().to_owned()))?; + let item2 = TupleValue::parse_value(&tuple[last_idx..])?; list.push((item1, item2)); } @@ -355,16 +394,48 @@ pub enum StringListParseError { InvalidValue(String), } -impl FromStr for StringList { +fn dequote(s: &str) -> String { + let mut prev_byte = b'\0'; + let mut in_quotes = false; + let mut out: Vec = vec![]; + for i in s.bytes() { + if i == b'"' { + if prev_byte == b'"' && !in_quotes { + out.push(b'"'); + } + in_quotes = !in_quotes; + } else { + out.push(i); + } + prev_byte = i + } + assert!(!in_quotes, "split_commas didn't reject unbalanced quotes"); + // SAFETY: the non-ASCII bytes in the output are the same + // and in the same order as those in the input, so if the + // input is valid UTF-8 the output will be as well. + unsafe { String::from_utf8_unchecked(out) } +} + +impl Parseable for T +where + T: FromStr + Sized, +{ + type Err = ::Err; + fn from_str(s: &str) -> std::result::Result { + dequote(s).parse() + } +} + +impl Parseable for StringList { type Err = StringListParseError; fn from_str(s: &str) -> std::result::Result { - let string_list: Vec = s - .trim() - .trim_matches(|c| c == '[' || c == ']') - .split(',') - .map(|e| e.to_owned()) - .collect(); + let string_list: Vec = + split_commas(s.trim().trim_matches(|c| c == '[' || c == ']')) + .map_err(|_| StringListParseError::InvalidValue(s.to_owned()))? + .iter() + .map(|e| e.to_owned()) + .collect(); Ok(StringList(string_list)) } @@ -385,6 +456,7 @@ mod tests { .add("topology") .add("cmdline"); + assert_eq!(split_commas("\"\"").unwrap(), vec!["\"\""]); parser.parse("size=128M,hanging_param").unwrap_err(); parser .parse("size=128M,too_many_equals=foo=bar") @@ -395,6 +467,8 @@ mod tests { assert_eq!(parser.get("size"), Some("128M".to_owned())); assert!(!parser.is_set("mergeable")); assert!(parser.is_set("size")); + parser.parse("size=").unwrap(); + assert!(parser.get("size").is_none()); parser.parse("size=128M,mergeable=on").unwrap(); assert_eq!(parser.get("size"), Some("128M".to_owned())); @@ -416,6 +490,14 @@ mod tests { parser.parse("topology=[").unwrap_err(); parser.parse("topology=[[[]]]]").unwrap_err(); + parser.parse("topology=[\"@\"\"b\"@[1,2]]").unwrap(); + assert_eq!( + parser + .convert::>>("topology") + .unwrap() + .unwrap(), + Tuple(vec![("@\"b".to_owned(), vec![1, 2])]) + ); parser.parse("cmdline=\"console=ttyS0,9600n8\"").unwrap(); assert_eq!( @@ -425,4 +507,14 @@ mod tests { parser.parse("cmdline=\"").unwrap_err(); parser.parse("cmdline=\"\"\"").unwrap_err(); } + + #[test] + fn parse_bytes() { + assert_eq!(::from_str("a=\"b\"").unwrap(), "a=b"); + } + + #[test] + fn check_dequote() { + assert_eq!(dequote("a\u{3b2}\"a\"\"\""), "a\u{3b2}a\"") + } } From 2524b015b8cdc4f95b459f3e3b78fc1e2508c97b Mon Sep 17 00:00:00 2001 From: Ruoqing He Date: Sun, 10 Aug 2025 23:54:37 +0000 Subject: [PATCH 071/294] arch: riscv: Introduce UEFI related constants Set UEFI_START and UEFI_SIZE for riscv64 layout. Signed-off-by: Ruoqing He --- arch/src/riscv64/layout.rs | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/arch/src/riscv64/layout.rs b/arch/src/riscv64/layout.rs index 40583301c1..3ef7eddf2c 100644 --- a/arch/src/riscv64/layout.rs +++ b/arch/src/riscv64/layout.rs @@ -44,16 +44,23 @@ // | | // | APLICs | // | | +// 4 MB +---------------------------------------------------------------+ +// | UEFI flash | // 0 GB +---------------------------------------------------------------+ // // use vm_memory::GuestAddress; +/// 0x0 ~ 0x40_0000 (4 MiB) is reserved to UEFI +/// UEFI binary size is required less than 3 MiB, reserving 4 MiB is enough. +pub const UEFI_START: GuestAddress = GuestAddress(0); +pub const UEFI_SIZE: u64 = 0x040_0000; + /// AIA related devices /// See https://elixir.bootlin.com/linux/v6.10/source/arch/riscv/include/uapi/asm/kvm.h -/// 0x0 ~ 0x0400_0000 (64 MiB) resides APLICs -pub const APLIC_START: GuestAddress = GuestAddress(0); +/// 0x40_0000 ~ 0x0400_0000 (64 MiB) resides APLICs +pub const APLIC_START: GuestAddress = GuestAddress(0x40_0000); pub const APLIC_SIZE: u64 = 0x4000; /// 0x0400_0000 ~ 0x0800_0000 (64 MiB) resides IMSICs From ef2bbe5012b778f5e9fe4f56752f79075965c7c8 Mon Sep 17 00:00:00 2001 From: Ruoqing He Date: Mon, 11 Aug 2025 00:01:03 +0000 Subject: [PATCH 072/294] arch: riscv: Introduce UEFI module Provide Error definitions and load_uefi to be referenced while loading firmware. Signed-off-by: Ruoqing He --- arch/src/riscv64/mod.rs | 2 ++ arch/src/riscv64/uefi.rs | 50 ++++++++++++++++++++++++++++++++++++++++ 2 files changed, 52 insertions(+) create mode 100644 arch/src/riscv64/uefi.rs diff --git a/arch/src/riscv64/mod.rs b/arch/src/riscv64/mod.rs index a04cf9471f..128698961d 100644 --- a/arch/src/riscv64/mod.rs +++ b/arch/src/riscv64/mod.rs @@ -7,6 +7,8 @@ pub mod fdt; /// Layout for this riscv64 system. pub mod layout; +/// Module for loading UEFI binary. +pub mod uefi; use std::collections::HashMap; use std::fmt::Debug; diff --git a/arch/src/riscv64/uefi.rs b/arch/src/riscv64/uefi.rs new file mode 100644 index 0000000000..bd40e36ff0 --- /dev/null +++ b/arch/src/riscv64/uefi.rs @@ -0,0 +1,50 @@ +// Copyright 2020 Arm Limited (or its affiliates). All rights reserved. +// +// SPDX-License-Identifier: Apache-2.0 + +use std::io::{Read, Seek, SeekFrom}; +use std::os::fd::AsFd; +use std::result; + +use thiserror::Error; +use vm_memory::{GuestAddress, GuestMemory}; + +/// Errors thrown while loading UEFI binary +#[derive(Debug, Error)] +pub enum Error { + /// Unable to seek to UEFI image start. + #[error("Unable to seek to UEFI image start")] + SeekUefiStart, + /// Unable to seek to UEFI image end. + #[error("Unable to seek to UEFI image end")] + SeekUefiEnd, + /// UEFI image too big. + #[error("UEFI image too big")] + UefiTooBig, + /// Unable to read UEFI image + #[error("Unable to read UEFI image")] + ReadUefiImage, +} +type Result = result::Result; + +pub fn load_uefi( + guest_mem: &M, + guest_addr: GuestAddress, + uefi_image: &mut F, +) -> Result<()> +where + F: Read + Seek + AsFd, +{ + let uefi_size = uefi_image + .seek(SeekFrom::End(0)) + .map_err(|_| Error::SeekUefiEnd)? as usize; + + // edk2 image on virtual platform is smaller than 3M + if uefi_size > 0x300000 { + return Err(Error::UefiTooBig); + } + uefi_image.rewind().map_err(|_| Error::SeekUefiStart)?; + guest_mem + .read_exact_volatile_from(guest_addr, &mut uefi_image.as_fd(), uefi_size) + .map_err(|_| Error::ReadUefiImage) +} From 2e0ec8095c78f0e15cd56fd7e8c935c1bb0f3753 Mon Sep 17 00:00:00 2001 From: Ruoqing He Date: Mon, 11 Aug 2025 00:07:43 +0000 Subject: [PATCH 073/294] vmm: Enable uefi_flash field for riscv64 uefi_flash field in memory_manager is required for uefi loading and booting, enable it for riscv64 architecture. Signed-off-by: Ruoqing He --- vmm/src/memory_manager.rs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/vmm/src/memory_manager.rs b/vmm/src/memory_manager.rs index 2aa8e51046..cdc5810445 100644 --- a/vmm/src/memory_manager.rs +++ b/vmm/src/memory_manager.rs @@ -199,7 +199,7 @@ pub struct MemoryManager { guest_ram_mappings: Vec, pub acpi_address: Option, - #[cfg(target_arch = "aarch64")] + #[cfg(any(target_arch = "aarch64", target_arch = "riscv64"))] uefi_flash: Option>, } @@ -1274,7 +1274,7 @@ impl MemoryManager { arch_mem_regions, ram_allocator, dynamic, - #[cfg(target_arch = "aarch64")] + #[cfg(any(target_arch = "aarch64", target_arch = "riscv64"))] uefi_flash: None, thp: config.thp, }; @@ -2192,7 +2192,7 @@ impl MemoryManager { self.guest_ram_mappings.len() as u32 } - #[cfg(target_arch = "aarch64")] + #[cfg(any(target_arch = "aarch64", target_arch = "riscv64"))] pub fn uefi_flash(&self) -> GuestMemoryAtomic { self.uefi_flash.as_ref().unwrap().clone() } From 0df4b1ac4f73e1c64ffc3813904d60e6966fb4c0 Mon Sep 17 00:00:00 2001 From: Ruoqing He Date: Mon, 11 Aug 2025 00:09:22 +0000 Subject: [PATCH 074/294] vmm: Define riscv64 UEFI Error Error::UefiLoad is required for load_firmware to propagate errors encountered, define it for riscv64. Signed-off-by: Ruoqing He --- vmm/src/vm.rs | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/vmm/src/vm.rs b/vmm/src/vm.rs index 012ebef05e..60ad39bc2f 100644 --- a/vmm/src/vm.rs +++ b/vmm/src/vm.rs @@ -116,6 +116,10 @@ pub enum Error { #[error("Cannot load the UEFI binary in memory")] UefiLoad(#[source] arch::aarch64::uefi::Error), + #[cfg(target_arch = "riscv64")] + #[error("Cannot load the UEFI binary in memory")] + UefiLoad(#[source] arch::riscv64::uefi::Error), + #[error("Cannot load the initramfs into memory")] InitramfsLoad, From 17195e1a460e8978790f11eec80c5db3ed3f3d02 Mon Sep 17 00:00:00 2001 From: Ruoqing He Date: Mon, 11 Aug 2025 00:23:41 +0000 Subject: [PATCH 075/294] vmm: Enable firmware boot for riscv64 Implement firmware boot (UEFI boot) for riscv64 architecture. Signed-off-by: Ruoqing He --- vmm/src/vm.rs | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) diff --git a/vmm/src/vm.rs b/vmm/src/vm.rs index 60ad39bc2f..e8354c5cd6 100644 --- a/vmm/src/vm.rs +++ b/vmm/src/vm.rs @@ -1085,6 +1085,15 @@ impl Vm { Ok(EntryPoint { entry_addr }) } + #[cfg(target_arch = "riscv64")] + fn load_firmware(mut firmware: &File, memory_manager: Arc>) -> Result<()> { + let uefi_flash = memory_manager.lock().as_ref().unwrap().uefi_flash(); + let mem = uefi_flash.memory(); + arch::riscv64::uefi::load_uefi(mem.deref(), arch::layout::UEFI_START, &mut firmware) + .map_err(Error::UefiLoad)?; + Ok(()) + } + #[cfg(target_arch = "riscv64")] fn load_kernel( firmware: Option, @@ -1108,17 +1117,17 @@ impl Vm { // If failed, retry to load it as UEFI binary. // As the UEFI binary is formatless, it must be the last option to try. Err(linux_loader::loader::Error::Pe(InvalidImageMagicNumber)) => { - // TODO: UEFI for riscv64 is scheduled to next stage. - unimplemented!() + Self::load_firmware(&kernel, memory_manager)?; + arch::layout::UEFI_START } Err(e) => { return Err(Error::KernelLoad(e)); } } } - (Some(_firmware), None) => { - // TODO: UEFI for riscv64 is scheduled to next stage. - unimplemented!() + (Some(firmware), None) => { + Self::load_firmware(&firmware, memory_manager)?; + arch::layout::UEFI_START } _ => return Err(Error::InvalidPayload), }; From 5226ceb9741d9328065f6cc47410283b93cc1144 Mon Sep 17 00:00:00 2001 From: Philipp Schuster Date: Mon, 11 Aug 2025 11:50:08 +0200 Subject: [PATCH 076/294] misc: switch error output from error! back to stderr This partially reverts ed8f347fe62edd33355ad771615296ff8edc8d33 from #7183 and 6277d7d5f20126945904fefdf5fb990bbcce5ae8 from #7201. # Output how it was merged for v47 (#7066) ``` Error: Cloud Hypervisor exited with the following chain of errors: 0: Error booting VM 1: The VM could not boot 2: Error manipulating firmware file 3: No such file or directory (os error 2) Debug Info: VmBoot(VmBoot(FirmwareFile(Os { code: 2, kind: NotFound, message: "No such file or directory" }))) ``` # Output after #7183 and #7201 ``` cloud-hypervisor: 31.385730ms:
ERROR:/home/pschuster/dev/cloud-hypervisor/src/lib.rs:27 -- Error: Cloud Hypervisor exited with the following chain of errors: cloud-hypervisor: 31.417961ms:
ERROR:/home/pschuster/dev/cloud-hypervisor/src/lib.rs:39 -- 0: Error booting VM cloud-hypervisor: 31.448078ms:
ERROR:/home/pschuster/dev/cloud-hypervisor/src/lib.rs:39 -- 1: The VM could not boot cloud-hypervisor: 31.486711ms:
ERROR:/home/pschuster/dev/cloud-hypervisor/src/lib.rs:39 -- 2: Error manipulating firmware file cloud-hypervisor: 31.513331ms:
ERROR:/home/pschuster/dev/cloud-hypervisor/src/lib.rs:39 -- 3: No such file or directory (os error 2) cloud-hypervisor: 31.548037ms:
ERROR:/home/pschuster/dev/cloud-hypervisor/src/lib.rs:44 -- cloud-hypervisor: 31.568045ms:
ERROR:/home/pschuster/dev/cloud-hypervisor/src/lib.rs:45 -- Debug Info: VmBoot(VmBoot(FirmwareFile(Os { code: 2, kind: NotFound, message: "No such file or directory" }))) ``` The "proper logger" has indeed the advantage that messages can be gracefully redirected to log files etc. However, this makes the error message hardly readable. Therefore, I propose to use error!() only for runtime errors messages but not a pretty-printed version of those. Signed-off-by: Philipp Schuster On-behalf-of: SAP philipp.schuster@sap.com --- src/lib.rs | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index b8065083de..836ed8e2ed 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -19,12 +19,15 @@ pub fn cli_print_error_chain<'a>( &'a (dyn Error + 'static), ) -> Option, ) { - let msg = format!("Error: {component} exited with the following"); + // Debug info. + error!("Fatal error: {top_error:?}"); + + eprint!("Error: {component} exited with the following "); if top_error.source().is_none() { - error!("{msg} error:"); - error!(" {top_error}"); + eprintln!("error:"); + eprintln!(" {top_error}"); } else { - error!("{msg} chain of errors:"); + eprintln!("chain of errors:"); std::iter::successors(Some(top_error), |sub_error| { // Dereference necessary to mitigate rustc compiler bug. // See @@ -34,13 +37,10 @@ pub fn cli_print_error_chain<'a>( .for_each(|(level, error)| { // Special case: handling of HTTP Server responses in ch-remote if let Some(message) = display_modifier(level, 2, error) { - error!("{message}"); + eprintln!("{message}"); } else { - error!(" {level}: {error}"); + eprintln!(" {level}: {error}"); } }); } - - error!(""); - error!("Debug Info: {top_error:?}"); } From 777b7ee11eab34c4ce219861e0b26c8f13f4b1cb Mon Sep 17 00:00:00 2001 From: Alex Orozco Date: Mon, 24 Mar 2025 19:02:50 +0000 Subject: [PATCH 077/294] devices: Add fw_cfg device Here we add the fw_cfg device as a legacy device to the device manager. It is guarded behind a fw_cfg flag in vmm at creation of the DeviceManager. In this cl we implement the fw_cfg device with one function (signature). Signed-off-by: Alex Orozco --- Cargo.lock | 1 + Cargo.toml | 1 + devices/Cargo.toml | 5 + devices/src/legacy/fw_cfg.rs | 309 +++++++++++++++++++++++++++++++++++ devices/src/legacy/mod.rs | 4 + docs/fw_cfg.md | 77 +++++++++ src/main.rs | 2 + vmm/Cargo.toml | 1 + vmm/src/device_manager.rs | 46 ++++++ vmm/src/lib.rs | 2 + vmm/src/vm.rs | 7 + 11 files changed, 455 insertions(+) create mode 100644 devices/src/legacy/fw_cfg.rs create mode 100644 docs/fw_cfg.md diff --git a/Cargo.lock b/Cargo.lock index 8173ec1fd1..f26735d3cb 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -542,6 +542,7 @@ dependencies = [ "vm-memory", "vm-migration", "vmm-sys-util", + "zerocopy 0.8.26", ] [[package]] diff --git a/Cargo.toml b/Cargo.toml index c19c070769..3fdfef690d 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -64,6 +64,7 @@ wait-timeout = { workspace = true } dbus_api = ["vmm/dbus_api", "zbus"] default = ["io_uring", "kvm"] dhat-heap = ["dhat", "vmm/dhat-heap"] # For heap profiling +fw_cfg = ["vmm/fw_cfg"] guest_debug = ["vmm/guest_debug"] igvm = ["mshv", "vmm/igvm"] io_uring = ["vmm/io_uring"] diff --git a/devices/Cargo.toml b/devices/Cargo.toml index 0c32e468e7..dc025948cc 100644 --- a/devices/Cargo.toml +++ b/devices/Cargo.toml @@ -28,11 +28,16 @@ vm-memory = { workspace = true, features = [ ] } vm-migration = { path = "../vm-migration" } vmm-sys-util = { workspace = true } +zerocopy = { version = "0.8.26", features = [ + "alloc", + "derive", +], optional = true } [target.'cfg(any(target_arch = "aarch64", target_arch = "riscv64"))'.dependencies] arch = { path = "../arch" } [features] default = [] +fw_cfg = ["zerocopy"] kvm = ["arch/kvm"] pvmemcontrol = [] diff --git a/devices/src/legacy/fw_cfg.rs b/devices/src/legacy/fw_cfg.rs new file mode 100644 index 0000000000..8810c14353 --- /dev/null +++ b/devices/src/legacy/fw_cfg.rs @@ -0,0 +1,309 @@ +// Copyright 2025 Google LLC. +// +// SPDX-License-Identifier: Apache-2.0 +// + +/// Cloud Hypervisor implementation of Qemu's fw_cfg spec +/// https://www.qemu.org/docs/master/specs/fw_cfg.html +/// Linux kernel fw_cfg driver header +/// https://github.com/torvalds/linux/blob/master/include/uapi/linux/qemu_fw_cfg.h +/// Uploading files to the guest via fw_cfg is supported for all kernels 4.6+ w/ CONFIG_FW_CFG_SYSFS enabled +/// https://cateee.net/lkddb/web-lkddb/FW_CFG_SYSFS.html +/// No kernel requirement if above functionality is not required, +/// only firmware must implement mechanism to interact with this fw_cfg device +use std::{ + fs::File, + io::Result, + mem::size_of_val, + os::unix::fs::FileExt, + sync::{Arc, Barrier}, +}; + +use vm_device::BusDevice; +use vmm_sys_util::sock_ctrl_msg::IntoIovec; +use zerocopy::{FromBytes, IntoBytes}; + +#[cfg(target_arch = "x86_64")] +const PORT_FW_CFG_SELECTOR: u64 = 0x510; +#[cfg(target_arch = "x86_64")] +const PORT_FW_CFG_DATA: u64 = 0x511; +#[cfg(target_arch = "x86_64")] +const PORT_FW_CFG_DMA_HI: u64 = 0x514; +#[cfg(target_arch = "x86_64")] +const PORT_FW_CFG_DMA_LO: u64 = 0x518; +#[cfg(target_arch = "x86_64")] +pub const PORT_FW_CFG_BASE: u64 = 0x510; +#[cfg(target_arch = "x86_64")] +pub const PORT_FW_CFG_WIDTH: u64 = 0xc; +#[cfg(target_arch = "aarch64")] +const PORT_FW_CFG_SELECTOR: u64 = 0x9030008; +#[cfg(target_arch = "aarch64")] +const PORT_FW_CFG_DATA: u64 = 0x9030000; +#[cfg(target_arch = "aarch64")] +const PORT_FW_CFG_DMA_HI: u64 = 0x9030010; +#[cfg(target_arch = "aarch64")] +const PORT_FW_CFG_DMA_LO: u64 = 0x9030014; +#[cfg(target_arch = "aarch64")] +pub const PORT_FW_CFG_BASE: u64 = 0x9030000; +#[cfg(target_arch = "aarch64")] +pub const PORT_FW_CFG_WIDTH: u64 = 0x10; + +const FW_CFG_SIGNATURE: u16 = 0x00; +const FW_CFG_ID: u16 = 0x01; +const FW_CFG_FILE_DIR: u16 = 0x19; +const FW_CFG_KNOWN_ITEMS: usize = 0x20; + +pub const FW_CFG_FILE_FIRST: u16 = 0x20; +pub const FW_CFG_DMA_SIGNATURE: [u8; 8] = *b"QEMU CFG"; +// Reserved (must be enabled) +const FW_CFG_F_RESERVED: u8 = 1 << 0; +// DMA Toggle Bit (enabled by default) +const FW_CFG_F_DMA: u8 = 1 << 1; +pub const FW_CFG_FEATURE: [u8; 4] = [FW_CFG_F_RESERVED | FW_CFG_F_DMA, 0, 0, 0]; + +#[derive(Debug)] +pub enum FwCfgContent { + Bytes(Vec), + Slice(&'static [u8]), + File(u64, File), + U32(u32), +} + +impl Default for FwCfgContent { + fn default() -> Self { + FwCfgContent::Slice(&[]) + } +} + +impl FwCfgContent { + fn size(&self) -> Result { + let ret = match self { + FwCfgContent::Bytes(v) => v.len(), + FwCfgContent::File(offset, f) => (f.metadata()?.len() - offset) as usize, + FwCfgContent::Slice(s) => s.len(), + FwCfgContent::U32(n) => size_of_val(n), + }; + u32::try_from(ret).map_err(|_| std::io::ErrorKind::InvalidInput.into()) + } +} + +#[derive(Debug, Default)] +pub struct FwCfgItem { + pub name: String, + pub content: FwCfgContent, +} + +/// https://www.qemu.org/docs/master/specs/fw_cfg.html +#[derive(Debug, Default)] +pub struct FwCfg { + selector: u16, + data_offset: u32, + items: Vec, // 0x20 and above + known_items: [FwCfgContent; FW_CFG_KNOWN_ITEMS], // 0x0 to 0x19 +} + +#[repr(C)] +#[derive(Debug, IntoBytes, FromBytes)] +struct FwCfgFilesHeader { + count_be: u32, +} + +pub const FILE_NAME_SIZE: usize = 56; + +pub fn create_file_name(name: &str) -> [u8; FILE_NAME_SIZE] { + let mut c_name = [0u8; FILE_NAME_SIZE]; + let c_len = std::cmp::min(FILE_NAME_SIZE - 1, name.len()); + c_name[0..c_len].copy_from_slice(&name.as_bytes()[0..c_len]); + c_name +} + +#[allow(dead_code)] +#[repr(C, packed)] +#[derive(Debug, IntoBytes, FromBytes, Clone, Copy)] +struct BootE820Entry { + addr: u64, + size: u64, + type_: u32, +} + +#[repr(C)] +#[derive(Debug, IntoBytes, FromBytes)] +struct FwCfgFile { + size_be: u32, + select_be: u16, + _reserved: u16, + name: [u8; FILE_NAME_SIZE], +} + +impl FwCfg { + pub fn new() -> FwCfg { + const DEFAULT_ITEM: FwCfgContent = FwCfgContent::Slice(&[]); + let mut known_items = [DEFAULT_ITEM; FW_CFG_KNOWN_ITEMS]; + known_items[FW_CFG_SIGNATURE as usize] = FwCfgContent::Slice(&FW_CFG_DMA_SIGNATURE); + known_items[FW_CFG_ID as usize] = FwCfgContent::Slice(&FW_CFG_FEATURE); + let file_buf = Vec::from(FwCfgFilesHeader { count_be: 0 }.as_mut_bytes()); + known_items[FW_CFG_FILE_DIR as usize] = FwCfgContent::Bytes(file_buf); + + FwCfg { + selector: 0, + data_offset: 0, + items: vec![], + known_items, + } + } + + fn file_dir_mut(&mut self) -> &mut Vec { + let FwCfgContent::Bytes(file_buf) = &mut self.known_items[FW_CFG_FILE_DIR as usize] else { + unreachable!("fw_cfg: selector {FW_CFG_FILE_DIR:#x} should be FwCfgContent::Byte!") + }; + file_buf + } + + fn update_count(&mut self) { + let mut header = FwCfgFilesHeader { + count_be: (self.items.len() as u32).to_be(), + }; + self.file_dir_mut()[0..4].copy_from_slice(header.as_mut_bytes()); + } + + pub fn add_item(&mut self, item: FwCfgItem) -> Result<()> { + let index = self.items.len(); + let c_name = create_file_name(&item.name); + let size = item.content.size()?; + let mut cfg_file = FwCfgFile { + size_be: size.to_be(), + select_be: (FW_CFG_FILE_FIRST + index as u16).to_be(), + _reserved: 0, + name: c_name, + }; + self.file_dir_mut() + .extend_from_slice(cfg_file.as_mut_bytes()); + self.items.push(item); + self.update_count(); + Ok(()) + } + + fn read_content(content: &FwCfgContent, offset: u32, data: &mut [u8], size: u32) -> Option { + let start = offset as usize; + let end = start + size as usize; + match content { + FwCfgContent::Bytes(b) => { + if b.len() >= size as usize { + data.copy_from_slice(&b[start..end]); + } + } + FwCfgContent::Slice(s) => { + if s.len() >= size as usize { + data.copy_from_slice(&s[start..end]); + } + } + FwCfgContent::File(o, f) => { + f.read_exact_at(data, o + offset as u64).ok()?; + } + FwCfgContent::U32(n) => { + let bytes = n.to_le_bytes(); + data.copy_from_slice(&bytes[start..end]); + } + }; + Some(size as u8) + } + + fn read_data(&mut self, data: &mut [u8], size: u32) -> u8 { + let ret = if let Some(content) = self.known_items.get(self.selector as usize) { + Self::read_content(content, self.data_offset, data, size) + } else if let Some(item) = self.items.get((self.selector - FW_CFG_FILE_FIRST) as usize) { + Self::read_content(&item.content, self.data_offset, data, size) + } else { + error!("fw_cfg: selector {:#x} does not exist.", self.selector); + None + }; + if let Some(val) = ret { + self.data_offset += size; + val + } else { + 0 + } + } +} + +impl BusDevice for FwCfg { + fn read(&mut self, _base: u64, offset: u64, data: &mut [u8]) { + let port = offset + PORT_FW_CFG_BASE; + let size = data.len(); + match (port, size) { + (PORT_FW_CFG_SELECTOR, _) => { + error!("fw_cfg: selector register is write-only."); + } + (PORT_FW_CFG_DATA, _) => _ = self.read_data(data, size as u32), + (PORT_FW_CFG_DMA_HI, 4) => { + unimplemented!() + } + (PORT_FW_CFG_DMA_LO, 4) => { + unimplemented!() + } + _ => { + debug!("fw_cfg: read from unknown port {port:#x}: {size:#x} bytes and offset {offset:#x}."); + } + }; + } + + fn write(&mut self, _base: u64, offset: u64, data: &[u8]) -> Option> { + let port = offset + PORT_FW_CFG_BASE; + let size = data.size(); + match (port, size) { + (PORT_FW_CFG_SELECTOR, 2) => { + let mut buf = [0u8; 2]; + buf[..size].copy_from_slice(&data[..size]); + #[cfg(target_arch = "x86_64")] + let val = u16::from_le_bytes(buf); + #[cfg(target_arch = "aarch64")] + let val = u16::from_be_bytes(buf); + self.selector = val; + self.data_offset = 0; + } + (PORT_FW_CFG_DATA, 1) => error!("fw_cfg: data register is read-only."), + (PORT_FW_CFG_DMA_HI, 4) => { + unimplemented!() + } + (PORT_FW_CFG_DMA_LO, 4) => { + unimplemented!() + } + _ => debug!( + "fw_cfg: write to unknown port {port:#x}: {size:#x} bytes and offset {offset:#x} ." + ), + }; + None + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[cfg(target_arch = "x86_64")] + const SELECTOR_OFFSET: u64 = 0; + #[cfg(target_arch = "aarch64")] + const SELECTOR_OFFSET: u64 = 8; + #[cfg(target_arch = "x86_64")] + const DATA_OFFSET: u64 = 1; + #[cfg(target_arch = "aarch64")] + const DATA_OFFSET: u64 = 0; + + #[test] + fn test_signature() { + let mut fw_cfg = FwCfg::new(); + + let mut data = vec![0u8]; + + let mut sig_iter = FW_CFG_DMA_SIGNATURE.into_iter(); + fw_cfg.write(0, SELECTOR_OFFSET, &[FW_CFG_SIGNATURE as u8, 0]); + loop { + if let Some(char) = sig_iter.next() { + fw_cfg.read(0, DATA_OFFSET, &mut data); + assert_eq!(data[0], char); + } else { + return; + } + } + } +} diff --git a/devices/src/legacy/mod.rs b/devices/src/legacy/mod.rs index 3f58e5c842..1087d3d27d 100644 --- a/devices/src/legacy/mod.rs +++ b/devices/src/legacy/mod.rs @@ -8,6 +8,8 @@ mod cmos; #[cfg(target_arch = "x86_64")] mod debug_port; +#[cfg(feature = "fw_cfg")] +pub mod fw_cfg; #[cfg(target_arch = "x86_64")] mod fwdebug; #[cfg(target_arch = "aarch64")] @@ -22,6 +24,8 @@ mod uart_pl011; pub use self::cmos::Cmos; #[cfg(target_arch = "x86_64")] pub use self::debug_port::DebugPort; +#[cfg(feature = "fw_cfg")] +pub use self::fw_cfg::FwCfg; #[cfg(target_arch = "x86_64")] pub use self::fwdebug::FwDebugDevice; #[cfg(target_arch = "aarch64")] diff --git a/docs/fw_cfg.md b/docs/fw_cfg.md new file mode 100644 index 0000000000..73f10a7808 --- /dev/null +++ b/docs/fw_cfg.md @@ -0,0 +1,77 @@ +# Firmware Configuration (fw_cfg) Device + +The `fw_cfg` device is a QEMU-compatible device that allows the hypervisor to pass configuration and data to the guest operating system. This is particularly useful for firmware to access information like ACPI tables, kernel images, initramfs, kernel command lines, and other arbitrary data blobs. + +Cloud Hypervisor implements the `fw_cfg` device with DMA-enabled access. + +## Purpose + +The `fw_cfg` device serves as a generic information channel between the VMM and the guest. It can be used to: + +* Load the kernel, initramfs, and kernel command line for direct kernel boot with firmware. +* Provide ACPI tables to the guest firmware or OS. +* Pass custom configuration files or data blobs (e.g., attestation data, SEV-SNP launch secrets) to the guest. +* Supply an E820 memory map to the guest. + +## Enabling `fw_cfg` + +The `fw_cfg` device is enabled via the `fw_cfg` feature flag when building Cloud Hypervisor: + +```bash +cargo build --features fw_cfg +``` + +## Guest Kernel Configuration + +For the guest Linux kernel to recognize and use the `fw_cfg` device via sysfs, the following kernel configuration option must be enabled: + +* `CONFIG_FW_CFG_SYSFS=y` + +This option allows the kernel to expose `fw_cfg` entries under `/sys/firmware/qemu_fw_cfg/by_name/`. + +## Command Line Options + +The `fw_cfg` device is configured using the `--fw-cfg-config` command-line option. + +**Parameters:** +* `e820=on|off`: (Default: `on`) Whether to add an E820 memory map entry to `fw_cfg`. +* `kernel=on|off`: (Default: `on`) Whether to add the kernel image (specified by `--kernel`) to `fw_cfg`. +* `cmdline=on|off`: (Default: `on`) Whether to add the kernel command line (specified by `--cmdline`) to `fw_cfg`. +* `initramfs=on|off`: (Default: `on`) Whether to add the initramfs image (specified by `--initramfs`) to `fw_cfg`. +* `acpi_table=on|off`: (Default: `on`) Whether to add generated ACPI tables to `fw_cfg`. +* `items=[... : ...]`: A list of custom key-value pairs to be exposed via `fw_cfg`. + * `name=`: The path under which the item will appear in the guest's sysfs (e.g., `opt/org.example/my-data`). + * `file=`: The path to the file on the host whose content will be provided to the guest for this item. + +**Example Usage:** + +1. **Direct kernel boot with custom `fw_cfg` entries:** + + ```bash + cloud-hypervisor \ + --kernel /path/to/vmlinux \ + --cmdline "console=hvc0 root=/dev/vda1" \ + --disk path=/path/to/rootfs.img \ + --fw-cfg-config initramfs=off,items=[name=opt/org.mycorp/setup_info,file=/tmp/guest_setup.txt] \ + ... + ``` + In the guest, `/tmp/guest_setup.txt` from the host will be accessible at `/sys/firmware/qemu_fw_cfg/by_name/opt/org.mycorp/setup_info/raw`. + +2. **Disabling `fw_cfg` explicitly:** + + ```bash + cloud-hypervisor \ + --fw-cfg-config disable \ + ... + ``` + +## Accessing `fw_cfg` Items in the Guest + +If `CONFIG_FW_CFG_SYSFS` is enabled in the guest kernel, items added to `fw_cfg` can be accessed via sysfs. + +For example, an item added with `name=opt/org.example/my-data` will be available at: +`/sys/firmware/qemu_fw_cfg/by_name/opt/org.example/my-data/raw` + +The `raw` file contains the binary content of the host file provided. + +Standard items like kernel, initramfs, cmdline, and ACPI tables also have predefined names (e.g., `etc/kernel`, `etc/cmdline`) if they are enabled to be passed via `fw_cfg`. diff --git a/src/main.rs b/src/main.rs index 56b48d59b5..bf0d9089d3 100644 --- a/src/main.rs +++ b/src/main.rs @@ -853,6 +853,8 @@ fn main() { compile_error!("Feature 'tdx' and 'sev_snp' are mutually exclusive."); #[cfg(all(feature = "sev_snp", not(target_arch = "x86_64")))] compile_error!("Feature 'sev_snp' needs target 'x86_64'"); + #[cfg(all(feature = "fw_cfg", target_arch = "riscv64"))] + compile_error!("Feature 'fw_cfg' needs targets 'x86_64' or 'aarch64'"); #[cfg(feature = "dhat-heap")] let _profiler = dhat::Profiler::new_heap(); diff --git a/vmm/Cargo.toml b/vmm/Cargo.toml index bceba9bdd2..5567a58579 100644 --- a/vmm/Cargo.toml +++ b/vmm/Cargo.toml @@ -8,6 +8,7 @@ version = "0.1.0" dbus_api = ["blocking", "futures", "zbus"] default = [] dhat-heap = ["dhat"] # For heap profiling +fw_cfg = ["devices/fw_cfg"] guest_debug = ["gdbstub", "gdbstub_arch", "kvm"] igvm = ["dep:igvm", "hex", "igvm_defs", "mshv-bindings", "range_map_vec"] io_uring = ["block/io_uring"] diff --git a/vmm/src/device_manager.rs b/vmm/src/device_manager.rs index 2df6f1d585..afff343354 100644 --- a/vmm/src/device_manager.rs +++ b/vmm/src/device_manager.rs @@ -57,6 +57,11 @@ use devices::ioapic; use devices::legacy::Pl011; #[cfg(any(target_arch = "x86_64", target_arch = "riscv64"))] use devices::legacy::Serial; +#[cfg(feature = "fw_cfg")] +use devices::legacy::{ + fw_cfg::{PORT_FW_CFG_BASE, PORT_FW_CFG_WIDTH}, + FwCfg, +}; #[cfg(feature = "pvmemcontrol")] use devices::pvmemcontrol::{PvmemcontrolBusDevice, PvmemcontrolPciDevice}; use devices::{interrupt_controller, AcpiNotificationFlags}; @@ -644,6 +649,11 @@ pub enum DeviceManagerError { /// Cannot lock images of all block devices. #[error("Cannot lock images of all block devices")] DiskLockError(#[source] virtio_devices::block::Error), + + #[cfg(feature = "fw_cfg")] + /// Error adding fw_cfg to bus. + #[error("Error adding fw_cfg to bus")] + ErrorAddingFwCfgToBus(#[source] vm_device::BusError), } pub type DeviceManagerResult = result::Result; @@ -1070,6 +1080,9 @@ pub struct DeviceManager { rate_limit_groups: HashMap>, mmio_regions: Arc>>, + + #[cfg(feature = "fw_cfg")] + fw_cfg: Option>>, } fn create_mmio_allocators( @@ -1334,6 +1347,8 @@ impl DeviceManager { snapshot, rate_limit_groups, mmio_regions: Arc::new(Mutex::new(Vec::new())), + #[cfg(feature = "fw_cfg")] + fw_cfg: None, }; let device_manager = Arc::new(Mutex::new(device_manager)); @@ -1460,6 +1475,32 @@ impl DeviceManager { Ok(()) } + #[cfg(feature = "fw_cfg")] + pub fn create_fw_cfg_device(&mut self) -> Result<(), DeviceManagerError> { + let fw_cfg = Arc::new(Mutex::new(devices::legacy::FwCfg::new())); + + self.fw_cfg = Some(fw_cfg.clone()); + + self.bus_devices + .push(Arc::clone(&fw_cfg) as Arc); + + #[cfg(target_arch = "x86_64")] + self.address_manager + .io_bus + .insert(fw_cfg, PORT_FW_CFG_BASE, PORT_FW_CFG_WIDTH) + .map_err(DeviceManagerError::ErrorAddingFwCfgToBus)?; + + // default address for fw_cfg on arm via mmio + // https://github.com/torvalds/linux/blob/master/drivers/firmware/qemu_fw_cfg.c#L27 + #[cfg(target_arch = "aarch64")] + self.address_manager + .mmio_bus + .insert(fw_cfg.clone(), PORT_FW_CFG_BASE, PORT_FW_CFG_WIDTH) + .map_err(DeviceManagerError::ErrorAddingFwCfgToBus)?; + + Ok(()) + } + fn state(&self) -> DeviceManagerState { DeviceManagerState { device_tree: self.device_tree.lock().unwrap().clone(), @@ -4181,6 +4222,11 @@ impl DeviceManager { &self.address_manager.mmio_bus } + #[cfg(feature = "fw_cfg")] + pub fn fw_cfg(&self) -> Option<&Arc>> { + self.fw_cfg.as_ref() + } + pub fn allocator(&self) -> &Arc> { &self.address_manager.allocator } diff --git a/vmm/src/lib.rs b/vmm/src/lib.rs index 2cf1cb6e95..ef99fb31a6 100644 --- a/vmm/src/lib.rs +++ b/vmm/src/lib.rs @@ -405,6 +405,8 @@ pub fn feature_list() -> Vec { "dbus_api".to_string(), #[cfg(feature = "dhat-heap")] "dhat-heap".to_string(), + #[cfg(feature = "fw_cfg")] + "fw_cfg".to_string(), #[cfg(feature = "guest_debug")] "guest_debug".to_string(), #[cfg(feature = "igvm")] diff --git a/vmm/src/vm.rs b/vmm/src/vm.rs index e8354c5cd6..683aa3df9b 100644 --- a/vmm/src/vm.rs +++ b/vmm/src/vm.rs @@ -720,6 +720,13 @@ impl Vm { vm.sev_snp_init().map_err(Error::InitializeSevSnpVm)?; } + #[cfg(feature = "fw_cfg")] + device_manager + .lock() + .unwrap() + .create_fw_cfg_device() + .map_err(Error::DeviceManager)?; + #[cfg(feature = "tdx")] let kernel = config .lock() From 623fadfa9dd26d1b619f900f673b21ff2d112d55 Mon Sep 17 00:00:00 2001 From: Alex Orozco Date: Mon, 24 Mar 2025 19:50:46 +0000 Subject: [PATCH 078/294] devices: Add kernel cmdline, kernel, and initramfs to fw_cfg device The kernel and initramfs are passed to the fw_cfg device as file references. The cmdline is passed directly. Signed-off-by: Alex Orozco --- Cargo.lock | 4 +- Cargo.toml | 3 +- devices/Cargo.toml | 7 ++- devices/src/legacy/fw_cfg.rs | 107 ++++++++++++++++++++++++++++++++++- fuzz/Cargo.toml | 7 ++- vmm/src/vm.rs | 81 ++++++++++++++++++++++++++ 6 files changed, 203 insertions(+), 6 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index f26735d3cb..e4f4454ece 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -531,6 +531,7 @@ dependencies = [ "event_monitor", "hypervisor", "libc", + "linux-loader", "log", "num_enum", "pci", @@ -1143,8 +1144,7 @@ dependencies = [ [[package]] name = "linux-loader" version = "0.13.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "870c3814345f050991f99869417779f6062542bcf4ed81db7a1b926ad1306638" +source = "git+https://github.com/rust-vmm/linux-loader?branch=main#d5f39c09d59c8f50d5313b78ce4de511b12d1848" dependencies = [ "vm-memory", ] diff --git a/Cargo.toml b/Cargo.toml index 3fdfef690d..89f994aafe 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -107,7 +107,8 @@ members = [ acpi_tables = { git = "https://github.com/rust-vmm/acpi_tables", branch = "main" } kvm-bindings = "0.12.0" kvm-ioctls = "0.22.0" -linux-loader = "0.13.0" +# TODO: update to 0.13.1+ +linux-loader = { git = "https://github.com/rust-vmm/linux-loader", branch = "main" } mshv-bindings = "0.5.2" mshv-ioctls = "0.5.2" seccompiler = "0.5.0" diff --git a/devices/Cargo.toml b/devices/Cargo.toml index dc025948cc..c35f58b880 100644 --- a/devices/Cargo.toml +++ b/devices/Cargo.toml @@ -13,6 +13,11 @@ byteorder = { workspace = true } event_monitor = { path = "../event_monitor" } hypervisor = { path = "../hypervisor" } libc = { workspace = true } +linux-loader = { workspace = true, features = [ + "bzimage", + "elf", + "pe", +], optional = true } log = { workspace = true } num_enum = "0.7.2" pci = { path = "../pci" } @@ -38,6 +43,6 @@ arch = { path = "../arch" } [features] default = [] -fw_cfg = ["zerocopy"] +fw_cfg = ["linux-loader", "zerocopy"] kvm = ["arch/kvm"] pvmemcontrol = [] diff --git a/devices/src/legacy/fw_cfg.rs b/devices/src/legacy/fw_cfg.rs index 8810c14353..87e4e66a30 100644 --- a/devices/src/legacy/fw_cfg.rs +++ b/devices/src/legacy/fw_cfg.rs @@ -14,12 +14,17 @@ use std::{ fs::File, io::Result, - mem::size_of_val, + mem::{size_of, size_of_val}, os::unix::fs::FileExt, sync::{Arc, Barrier}, }; +#[cfg(target_arch = "x86_64")] +use linux_loader::bootparam::boot_params; +#[cfg(target_arch = "aarch64")] +use linux_loader::loader::pe::arm64_image_header as boot_params; use vm_device::BusDevice; +use vm_memory::ByteValued; use vmm_sys_util::sock_ctrl_msg::IntoIovec; use zerocopy::{FromBytes, IntoBytes}; @@ -50,6 +55,14 @@ pub const PORT_FW_CFG_WIDTH: u64 = 0x10; const FW_CFG_SIGNATURE: u16 = 0x00; const FW_CFG_ID: u16 = 0x01; +const FW_CFG_KERNEL_SIZE: u16 = 0x08; +const FW_CFG_INITRD_SIZE: u16 = 0x0b; +const FW_CFG_KERNEL_DATA: u16 = 0x11; +const FW_CFG_INITRD_DATA: u16 = 0x12; +const FW_CFG_CMDLINE_SIZE: u16 = 0x14; +const FW_CFG_CMDLINE_DATA: u16 = 0x15; +const FW_CFG_SETUP_SIZE: u16 = 0x17; +const FW_CFG_SETUP_DATA: u16 = 0x18; const FW_CFG_FILE_DIR: u16 = 0x19; const FW_CFG_KNOWN_ITEMS: usize = 0x20; @@ -183,6 +196,46 @@ impl FwCfg { Ok(()) } + pub fn add_kernel_data(&mut self, file: &File) -> Result<()> { + let mut buffer = vec![0u8; size_of::()]; + file.read_exact_at(&mut buffer, 0)?; + let bp = boot_params::from_mut_slice(&mut buffer).unwrap(); + #[cfg(target_arch = "x86_64")] + { + // must set to 4 for backwards compatibility + // https://docs.kernel.org/arch/x86/boot.html#the-real-mode-kernel-header + if bp.hdr.setup_sects == 0 { + bp.hdr.setup_sects = 4; + } + // wildcard boot loader type + bp.hdr.type_of_loader = 0xff; + } + #[cfg(target_arch = "aarch64")] + let kernel_start = bp.text_offset; + #[cfg(target_arch = "x86_64")] + let kernel_start = (bp.hdr.setup_sects as usize + 1) * 512; + self.known_items[FW_CFG_SETUP_SIZE as usize] = FwCfgContent::U32(buffer.len() as u32); + self.known_items[FW_CFG_SETUP_DATA as usize] = FwCfgContent::Bytes(buffer); + self.known_items[FW_CFG_KERNEL_SIZE as usize] = + FwCfgContent::U32(file.metadata()?.len() as u32 - kernel_start as u32); + self.known_items[FW_CFG_KERNEL_DATA as usize] = + FwCfgContent::File(kernel_start as u64, file.try_clone()?); + Ok(()) + } + + pub fn add_kernel_cmdline(&mut self, s: std::ffi::CString) { + let bytes = s.into_bytes_with_nul(); + self.known_items[FW_CFG_CMDLINE_SIZE as usize] = FwCfgContent::U32(bytes.len() as u32); + self.known_items[FW_CFG_CMDLINE_DATA as usize] = FwCfgContent::Bytes(bytes); + } + + pub fn add_initramfs_data(&mut self, file: &File) -> Result<()> { + let initramfs_size = file.metadata()?.len(); + self.known_items[FW_CFG_INITRD_SIZE as usize] = FwCfgContent::U32(initramfs_size as _); + self.known_items[FW_CFG_INITRD_DATA as usize] = FwCfgContent::File(0, file.try_clone()?); + Ok(()) + } + fn read_content(content: &FwCfgContent, offset: u32, data: &mut [u8], size: u32) -> Option { let start = offset as usize; let end = start + size as usize; @@ -278,6 +331,11 @@ impl BusDevice for FwCfg { #[cfg(test)] mod tests { + use std::ffi::CString; + use std::io::Write; + + use vmm_sys_util::tempfile::TempFile; + use super::*; #[cfg(target_arch = "x86_64")] @@ -306,4 +364,51 @@ mod tests { } } } + #[test] + fn test_kernel_cmdline() { + let mut fw_cfg = FwCfg::new(); + + let cmdline = *b"cmdline\0"; + + fw_cfg.add_kernel_cmdline(CString::from_vec_with_nul(cmdline.to_vec()).unwrap()); + + let mut data = vec![0u8]; + + let mut cmdline_iter = cmdline.into_iter(); + fw_cfg.write(0, SELECTOR_OFFSET, &[FW_CFG_CMDLINE_DATA as u8, 0]); + loop { + if let Some(char) = cmdline_iter.next() { + fw_cfg.read(0, DATA_OFFSET, &mut data); + assert_eq!(data[0], char); + } else { + return; + } + } + } + + #[test] + fn test_initram_fs() { + let mut fw_cfg = FwCfg::new(); + + let temp = TempFile::new().unwrap(); + let mut temp_file = temp.as_file(); + + let initram_content = b"this is the initramfs"; + let written = temp_file.write(initram_content); + assert_eq!(written.unwrap(), 21); + let _ = fw_cfg.add_initramfs_data(temp_file); + + let mut data = vec![0u8]; + + let mut initram_iter = (*initram_content).into_iter(); + fw_cfg.write(0, SELECTOR_OFFSET, &[FW_CFG_INITRD_DATA as u8, 0]); + loop { + if let Some(char) = initram_iter.next() { + fw_cfg.read(0, DATA_OFFSET, &mut data); + assert_eq!(data[0], char); + } else { + return; + } + } + } } diff --git a/fuzz/Cargo.toml b/fuzz/Cargo.toml index 5d6ac0a29e..8a7e1d4849 100644 --- a/fuzz/Cargo.toml +++ b/fuzz/Cargo.toml @@ -22,7 +22,12 @@ epoll = "4.3.3" hypervisor = { path = "../hypervisor", features = ["mshv_emulator"] } libc = "0.2.155" libfuzzer-sys = "0.4.7" -linux-loader = { version = "0.13.0", features = ["bzimage", "elf", "pe"] } +# TODO: update to 0.13.1+ +linux-loader = { git = "https://github.com/rust-vmm/linux-loader", branch = "main", features = [ + "bzimage", + "elf", + "pe", +] } micro_http = { git = "https://github.com/firecracker-microvm/micro-http", branch = "main" } mshv-bindings = "0.5.2" net_util = { path = "../net_util" } diff --git a/vmm/src/vm.rs b/vmm/src/vm.rs index 683aa3df9b..f7fafeb6f7 100644 --- a/vmm/src/vm.rs +++ b/vmm/src/vm.rs @@ -339,6 +339,18 @@ pub enum Error { #[error("Error locking disk images: Another instance likely holds a lock")] LockingError(#[source] DeviceManagerError), + + #[cfg(feature = "fw_cfg")] + #[error("Fw Cfg missing kernel")] + MissingFwCfgKernelFile(#[source] io::Error), + + #[cfg(feature = "fw_cfg")] + #[error("Fw Cfg missing initramfs")] + MissingFwCfgInitramfs(#[source] io::Error), + + #[cfg(feature = "fw_cfg")] + #[error("Fw Cfg missing kernel cmdline")] + MissingFwCfgCmdline, } pub type Result = result::Result; @@ -784,6 +796,72 @@ impl Vm { }) } + #[cfg(feature = "fw_cfg")] + fn populate_fw_cfg( + device_manager: &Arc>, + config: &Arc>, + ) -> Result<()> { + let kernel = config + .lock() + .unwrap() + .payload + .as_ref() + .map(|p| p.kernel.as_ref().map(File::open)) + .unwrap_or_default() + .transpose() + .map_err(Error::MissingFwCfgKernelFile)?; + if let Some(kernel_file) = kernel { + device_manager + .lock() + .unwrap() + .fw_cfg() + .expect("fw_cfg device must be present") + .lock() + .unwrap() + .add_kernel_data(&kernel_file) + .map_err(Error::MissingFwCfgKernelFile)? + } + let cmdline = Vm::generate_cmdline( + config.lock().unwrap().payload.as_ref().unwrap(), + #[cfg(target_arch = "aarch64")] + device_manager, + ) + .map_err(|_| Error::MissingFwCfgCmdline)? + .as_cstring() + .map_err(|_| Error::MissingFwCfgCmdline)?; + device_manager + .lock() + .unwrap() + .fw_cfg() + .expect("fw_cfg device must be present") + .lock() + .unwrap() + .add_kernel_cmdline(cmdline); + let initramfs = config + .lock() + .unwrap() + .payload + .as_ref() + .map(|p| p.initramfs.as_ref().map(File::open)) + .unwrap_or_default() + .transpose() + .map_err(Error::MissingFwCfgInitramfs)?; + // We measure the initramfs when running Oak Containers in SNP mode (initramfs = Stage1) + // o/w use Stage0 to launch cloud disk images + if let Some(initramfs_file) = initramfs { + device_manager + .lock() + .unwrap() + .fw_cfg() + .expect("fw_cfg device must be present") + .lock() + .unwrap() + .add_initramfs_data(&initramfs_file) + .map_err(Error::MissingFwCfgInitramfs)?; + } + Ok(()) + } + fn create_numa_nodes( configs: Option>, memory_manager: &Arc>, @@ -2272,6 +2350,9 @@ impl Vm { }; current_state.valid_transition(new_state)?; + #[cfg(feature = "fw_cfg")] + Self::populate_fw_cfg(&self.device_manager, &self.config)?; + // Do earlier to parallelise with loading kernel #[cfg(target_arch = "x86_64")] cfg_if::cfg_if! { From f0b69d56d05ad4d96f1c8669f8483f5bb8d5a05e Mon Sep 17 00:00:00 2001 From: Alex Orozco Date: Fri, 18 Apr 2025 16:20:58 +0000 Subject: [PATCH 079/294] devices: Add e820/memory_map to fw_cfg device We build the memory map in the fw_cfg device based on the memory size. Signed-off-by: Alex Orozco --- devices/src/legacy/fw_cfg.rs | 77 ++++++++++++++++++++++++++++++++++++ vmm/src/vm.rs | 13 ++++++ 2 files changed, 90 insertions(+) diff --git a/devices/src/legacy/fw_cfg.rs b/devices/src/legacy/fw_cfg.rs index 87e4e66a30..ac8f20e858 100644 --- a/devices/src/legacy/fw_cfg.rs +++ b/devices/src/legacy/fw_cfg.rs @@ -19,15 +19,35 @@ use std::{ sync::{Arc, Barrier}, }; +#[cfg(target_arch = "aarch64")] +use arch::aarch64::layout::{ + MEM_32BIT_DEVICES_START, MEM_32BIT_RESERVED_START, RAM_64BIT_START, RAM_START as HIGH_RAM_START, +}; +#[cfg(target_arch = "x86_64")] +use arch::layout::{ + EBDA_START, HIGH_RAM_START, MEM_32BIT_DEVICES_SIZE, MEM_32BIT_DEVICES_START, + MEM_32BIT_RESERVED_START, PCI_MMCONFIG_SIZE, PCI_MMCONFIG_START, RAM_64BIT_START, +}; +use arch::RegionType; #[cfg(target_arch = "x86_64")] use linux_loader::bootparam::boot_params; #[cfg(target_arch = "aarch64")] use linux_loader::loader::pe::arm64_image_header as boot_params; use vm_device::BusDevice; use vm_memory::ByteValued; +#[cfg(target_arch = "x86_64")] +use vm_memory::GuestAddress; use vmm_sys_util::sock_ctrl_msg::IntoIovec; use zerocopy::{FromBytes, IntoBytes}; +#[cfg(target_arch = "x86_64")] +// https://github.com/project-oak/oak/tree/main/stage0_bin#memory-layout +const STAGE0_START_ADDRESS: GuestAddress = GuestAddress(0xfffe_0000); +#[cfg(target_arch = "x86_64")] +const STAGE0_SIZE: usize = 0x2_0000; +const E820_RAM: u32 = 1; +const E820_RESERVED: u32 = 2; + #[cfg(target_arch = "x86_64")] const PORT_FW_CFG_SELECTOR: u64 = 0x510; #[cfg(target_arch = "x86_64")] @@ -165,6 +185,63 @@ impl FwCfg { } } + pub fn add_e820(&mut self, mem_size: usize) -> Result<()> { + #[cfg(target_arch = "x86_64")] + let mut mem_regions = vec![ + (GuestAddress(0), EBDA_START.0 as usize, RegionType::Ram), + ( + MEM_32BIT_DEVICES_START, + MEM_32BIT_DEVICES_SIZE as usize, + RegionType::Reserved, + ), + ( + PCI_MMCONFIG_START, + PCI_MMCONFIG_SIZE as usize, + RegionType::Reserved, + ), + (STAGE0_START_ADDRESS, STAGE0_SIZE, RegionType::Reserved), + ]; + #[cfg(target_arch = "aarch64")] + let mut mem_regions = arch::aarch64::arch_memory_regions(); + if mem_size < MEM_32BIT_DEVICES_START.0 as usize { + mem_regions.push(( + HIGH_RAM_START, + mem_size - HIGH_RAM_START.0 as usize, + RegionType::Ram, + )); + } else { + mem_regions.push(( + HIGH_RAM_START, + MEM_32BIT_RESERVED_START.0 as usize - HIGH_RAM_START.0 as usize, + RegionType::Ram, + )); + mem_regions.push(( + RAM_64BIT_START, + mem_size - (MEM_32BIT_DEVICES_START.0 as usize), + RegionType::Ram, + )); + } + let mut bytes = vec![]; + for (addr, size, region) in mem_regions.iter() { + let type_ = match region { + RegionType::Ram => E820_RAM, + RegionType::Reserved => E820_RESERVED, + RegionType::SubRegion => continue, + }; + let mut entry = BootE820Entry { + addr: addr.0, + size: *size as u64, + type_, + }; + bytes.extend_from_slice(entry.as_mut_bytes()); + } + let item = FwCfgItem { + name: "etc/e820".to_owned(), + content: FwCfgContent::Bytes(bytes), + }; + self.add_item(item) + } + fn file_dir_mut(&mut self) -> &mut Vec { let FwCfgContent::Bytes(file_buf) = &mut self.known_items[FW_CFG_FILE_DIR as usize] else { unreachable!("fw_cfg: selector {FW_CFG_FILE_DIR:#x} should be FwCfgContent::Byte!") diff --git a/vmm/src/vm.rs b/vmm/src/vm.rs index f7fafeb6f7..c527741b4d 100644 --- a/vmm/src/vm.rs +++ b/vmm/src/vm.rs @@ -351,6 +351,10 @@ pub enum Error { #[cfg(feature = "fw_cfg")] #[error("Fw Cfg missing kernel cmdline")] MissingFwCfgCmdline, + + #[cfg(feature = "fw_cfg")] + #[error("Error creating e820 map")] + CreatingE820Map(#[source] io::Error), } pub type Result = result::Result; @@ -801,6 +805,15 @@ impl Vm { device_manager: &Arc>, config: &Arc>, ) -> Result<()> { + device_manager + .lock() + .unwrap() + .fw_cfg() + .expect("fw_cfg device must be present") + .lock() + .unwrap() + .add_e820(config.lock().unwrap().memory.size as usize) + .map_err(Error::CreatingE820Map)?; let kernel = config .lock() .unwrap() From 1f51e4525bbfcc0d618daf75c26cc927a46cc5ab Mon Sep 17 00:00:00 2001 From: Alex Orozco Date: Mon, 24 Mar 2025 20:12:08 +0000 Subject: [PATCH 080/294] devices: Add acpi tables to fw_cfg The acpi tables are created in the same place the acpi tables would be created for the regular bootflow, except here we add them to the fw_cfg device to be measured by the fw and then the fw will put the acpi tables into memory. Signed-off-by: Alex Orozco --- devices/src/legacy/fw_cfg.rs | 198 ++++++++++++++++++++++++++++++++++- vmm/src/acpi.rs | 63 ++++++++++- vmm/src/vm.rs | 17 ++- 3 files changed, 274 insertions(+), 4 deletions(-) diff --git a/devices/src/legacy/fw_cfg.rs b/devices/src/legacy/fw_cfg.rs index ac8f20e858..c12c50d5da 100644 --- a/devices/src/legacy/fw_cfg.rs +++ b/devices/src/legacy/fw_cfg.rs @@ -14,11 +14,12 @@ use std::{ fs::File, io::Result, - mem::{size_of, size_of_val}, + mem::offset_of, os::unix::fs::FileExt, sync::{Arc, Barrier}, }; +use acpi_tables::rsdp::Rsdp; #[cfg(target_arch = "aarch64")] use arch::aarch64::layout::{ MEM_32BIT_DEVICES_START, MEM_32BIT_RESERVED_START, RAM_64BIT_START, RAM_START as HIGH_RAM_START, @@ -38,7 +39,7 @@ use vm_memory::ByteValued; #[cfg(target_arch = "x86_64")] use vm_memory::GuestAddress; use vmm_sys_util::sock_ctrl_msg::IntoIovec; -use zerocopy::{FromBytes, IntoBytes}; +use zerocopy::{FromBytes, Immutable, IntoBytes}; #[cfg(target_arch = "x86_64")] // https://github.com/project-oak/oak/tree/main/stage0_bin#memory-layout @@ -94,6 +95,17 @@ const FW_CFG_F_RESERVED: u8 = 1 << 0; const FW_CFG_F_DMA: u8 = 1 << 1; pub const FW_CFG_FEATURE: [u8; 4] = [FW_CFG_F_RESERVED | FW_CFG_F_DMA, 0, 0, 0]; +const COMMAND_ALLOCATE: u32 = 0x1; +const COMMAND_ADD_POINTER: u32 = 0x2; +const COMMAND_ADD_CHECKSUM: u32 = 0x3; + +const ALLOC_ZONE_HIGH: u8 = 0x1; +const ALLOC_ZONE_FSEG: u8 = 0x2; + +const FW_CFG_FILENAME_TABLE_LOADER: &str = "etc/table-loader"; +const FW_CFG_FILENAME_RSDP: &str = "acpi/rsdp"; +const FW_CFG_FILENAME_ACPI_TABLES: &str = "acpi/tables"; + #[derive(Debug)] pub enum FwCfgContent { Bytes(Vec), @@ -168,6 +180,169 @@ struct FwCfgFile { name: [u8; FILE_NAME_SIZE], } +#[repr(C, align(4))] +#[derive(Debug, IntoBytes, Immutable)] +struct Allocate { + command: u32, + file: [u8; FILE_NAME_SIZE], + align: u32, + zone: u8, + _pad: [u8; 63], +} + +#[repr(C, align(4))] +#[derive(Debug, IntoBytes, Immutable)] +struct AddPointer { + command: u32, + dst: [u8; FILE_NAME_SIZE], + src: [u8; FILE_NAME_SIZE], + offset: u32, + size: u8, + _pad: [u8; 7], +} + +#[repr(C, align(4))] +#[derive(Debug, IntoBytes, Immutable)] +struct AddChecksum { + command: u32, + file: [u8; FILE_NAME_SIZE], + offset: u32, + start: u32, + len: u32, + _pad: [u8; 56], +} + +fn create_intra_pointer(name: &str, offset: usize, size: u8) -> AddPointer { + AddPointer { + command: COMMAND_ADD_POINTER, + dst: create_file_name(name), + src: create_file_name(name), + offset: offset as u32, + size, + _pad: [0; 7], + } +} + +fn create_acpi_table_checksum(offset: usize, len: usize) -> AddChecksum { + AddChecksum { + command: COMMAND_ADD_CHECKSUM, + file: create_file_name(FW_CFG_FILENAME_ACPI_TABLES), + offset: (offset + offset_of!(AcpiTableHeader, checksum)) as u32, + start: offset as u32, + len: len as u32, + _pad: [0; 56], + } +} + +#[repr(C, align(4))] +#[derive(Debug, Clone, Default, FromBytes, IntoBytes)] +struct AcpiTableHeader { + signature: [u8; 4], + length: u32, + revision: u8, + checksum: u8, + oem_id: [u8; 6], + oem_table_id: [u8; 8], + oem_revision: u32, + asl_compiler_id: [u8; 4], + asl_compiler_revision: u32, +} + +struct AcpiTable { + rsdp: Rsdp, + tables: Vec, + table_pointers: Vec, + table_checksums: Vec<(usize, usize)>, +} + +impl AcpiTable { + fn pointers(&self) -> &[usize] { + &self.table_pointers + } + + fn checksums(&self) -> &[(usize, usize)] { + &self.table_checksums + } + + fn take(self) -> (Rsdp, Vec) { + (self.rsdp, self.tables) + } +} + +// Creates fw_cfg items used by firmware to load and verify Acpi tables +// https://github.com/qemu/qemu/blob/master/hw/acpi/bios-linker-loader.c +fn create_acpi_loader(acpi_table: AcpiTable) -> [FwCfgItem; 3] { + let mut table_loader_bytes: Vec = Vec::new(); + let allocate_rsdp = Allocate { + command: COMMAND_ALLOCATE, + file: create_file_name(FW_CFG_FILENAME_RSDP), + align: 4, + zone: ALLOC_ZONE_FSEG, + _pad: [0; 63], + }; + table_loader_bytes.extend(allocate_rsdp.as_bytes()); + + let allocate_tables = Allocate { + command: COMMAND_ALLOCATE, + file: create_file_name(FW_CFG_FILENAME_ACPI_TABLES), + align: 4, + zone: ALLOC_ZONE_HIGH, + _pad: [0; 63], + }; + table_loader_bytes.extend(allocate_tables.as_bytes()); + + for pointer_offset in acpi_table.pointers().iter() { + let pointer = create_intra_pointer(FW_CFG_FILENAME_ACPI_TABLES, *pointer_offset, 8); + table_loader_bytes.extend(pointer.as_bytes()); + } + for (offset, len) in acpi_table.checksums().iter() { + let checksum = create_acpi_table_checksum(*offset, *len); + table_loader_bytes.extend(checksum.as_bytes()); + } + let pointer_rsdp_to_xsdt = AddPointer { + command: COMMAND_ADD_POINTER, + dst: create_file_name(FW_CFG_FILENAME_RSDP), + src: create_file_name(FW_CFG_FILENAME_ACPI_TABLES), + offset: offset_of!(Rsdp, xsdt_addr) as u32, + size: 8, + _pad: [0; 7], + }; + table_loader_bytes.extend(pointer_rsdp_to_xsdt.as_bytes()); + let checksum_rsdp = AddChecksum { + command: COMMAND_ADD_CHECKSUM, + file: create_file_name(FW_CFG_FILENAME_RSDP), + offset: offset_of!(Rsdp, checksum) as u32, + start: 0, + len: offset_of!(Rsdp, length) as u32, + _pad: [0; 56], + }; + let checksum_rsdp_ext = AddChecksum { + command: COMMAND_ADD_CHECKSUM, + file: create_file_name(FW_CFG_FILENAME_RSDP), + offset: offset_of!(Rsdp, extended_checksum) as u32, + start: 0, + len: size_of::() as u32, + _pad: [0; 56], + }; + table_loader_bytes.extend(checksum_rsdp.as_bytes()); + table_loader_bytes.extend(checksum_rsdp_ext.as_bytes()); + + let table_loader = FwCfgItem { + name: FW_CFG_FILENAME_TABLE_LOADER.to_owned(), + content: FwCfgContent::Bytes(table_loader_bytes), + }; + let (rsdp, tables) = acpi_table.take(); + let acpi_rsdp = FwCfgItem { + name: FW_CFG_FILENAME_RSDP.to_owned(), + content: FwCfgContent::Bytes(rsdp.as_bytes().to_owned()), + }; + let apci_tables = FwCfgItem { + name: FW_CFG_FILENAME_ACPI_TABLES.to_owned(), + content: FwCfgContent::Bytes(tables), + }; + [table_loader, acpi_rsdp, apci_tables] +} + impl FwCfg { pub fn new() -> FwCfg { const DEFAULT_ITEM: FwCfgContent = FwCfgContent::Slice(&[]); @@ -306,6 +481,25 @@ impl FwCfg { self.known_items[FW_CFG_CMDLINE_DATA as usize] = FwCfgContent::Bytes(bytes); } + pub fn add_acpi( + &mut self, + rsdp: Rsdp, + tables: Vec, + table_checksums: Vec<(usize, usize)>, + table_pointers: Vec, + ) -> Result<()> { + let acpi_table = AcpiTable { + rsdp, + tables, + table_checksums, + table_pointers, + }; + let [table_loader, acpi_rsdp, apci_tables] = create_acpi_loader(acpi_table); + self.add_item(table_loader)?; + self.add_item(acpi_rsdp)?; + self.add_item(apci_tables) + } + pub fn add_initramfs_data(&mut self, file: &File) -> Result<()> { let initramfs_size = file.metadata()?.len(); self.known_items[FW_CFG_INITRD_SIZE as usize] = FwCfgContent::U32(initramfs_size as _); diff --git a/vmm/src/acpi.rs b/vmm/src/acpi.rs index 2185ee8b32..8dc2c3276c 100644 --- a/vmm/src/acpi.rs +++ b/vmm/src/acpi.rs @@ -192,6 +192,8 @@ pub fn create_dsdt_table( dsdt } +const FACP_DSDT_OFFSET: usize = 140; + fn create_facp_table(dsdt_offset: GuestAddress, device_manager: &Arc>) -> Sdt { trace_scoped!("create_facp_table"); @@ -241,7 +243,7 @@ fn create_facp_table(dsdt_offset: GuestAddress, device_manager: &Arc>, + cpu_manager: &Arc>, + memory_manager: &Arc>, + numa_nodes: &NumaNodes, + tpm_enabled: bool, +) -> Result<(), crate::vm::Error> { + let dsdt_offset = GuestAddress(0); + let (rsdp, table_bytes, xsdt_table_pointers) = create_acpi_tables_internal( + dsdt_offset, + device_manager, + cpu_manager, + memory_manager, + numa_nodes, + tpm_enabled, + ); + let mut pointer_offsets: Vec = vec![]; + let mut checksums: Vec<(usize, usize)> = vec![]; + + let xsdt_addr = rsdp.xsdt_addr.get() as usize; + let xsdt_checksum = (xsdt_addr, table_bytes.len() - xsdt_addr); + + // create pointer offsets (use location of pointers in XSDT table) + // XSDT doesn't have a pointer to DSDT so we use FACP's pointer to DSDT + let facp_offset = xsdt_table_pointers[0] as usize; + pointer_offsets.push(facp_offset + FACP_DSDT_OFFSET); + let mut current_offset = xsdt_addr + 36; + for _ in 0..xsdt_table_pointers.len() { + pointer_offsets.push(current_offset); + current_offset += 8; + } + + // create (offset, len) pairs for firmware to calculate + // table checksums and verify ACPI tables + let mut i = 0; + while i < xsdt_table_pointers.len() - 1 { + let current_table_offset = xsdt_table_pointers[i]; + let current_table_length = xsdt_table_pointers[i + 1] - current_table_offset; + checksums.push((current_table_offset as usize, current_table_length as usize)); + i += 1; + } + checksums.push(( + xsdt_table_pointers[xsdt_table_pointers.len() - 1] as usize, + 0, + )); + checksums.push(xsdt_checksum); + + device_manager + .lock() + .unwrap() + .fw_cfg() + .expect("fw_cfg must be present") + .lock() + .unwrap() + .add_acpi(rsdp, table_bytes, checksums, pointer_offsets) + .map_err(crate::vm::Error::CreatingAcpiTables) +} + pub fn create_acpi_tables( guest_mem: &GuestMemoryMmap, device_manager: &Arc>, diff --git a/vmm/src/vm.rs b/vmm/src/vm.rs index c527741b4d..bed0a4363a 100644 --- a/vmm/src/vm.rs +++ b/vmm/src/vm.rs @@ -355,6 +355,10 @@ pub enum Error { #[cfg(feature = "fw_cfg")] #[error("Error creating e820 map")] CreatingE820Map(#[source] io::Error), + + #[cfg(feature = "fw_cfg")] + #[error("Error creating acpi tables")] + CreatingAcpiTables(#[source] io::Error), } pub type Result = result::Result; @@ -2361,10 +2365,21 @@ impl Vm { } else { VmState::Running }; + current_state.valid_transition(new_state)?; #[cfg(feature = "fw_cfg")] - Self::populate_fw_cfg(&self.device_manager, &self.config)?; + { + Self::populate_fw_cfg(&self.device_manager, &self.config)?; + let tpm_enabled = self.config.lock().unwrap().tpm.is_some(); + crate::acpi::create_acpi_tables_for_fw_cfg( + &self.device_manager, + &self.cpu_manager, + &self.memory_manager, + &self.numa_nodes, + tpm_enabled, + )? + } // Do earlier to parallelise with loading kernel #[cfg(target_arch = "x86_64")] From edee53ac1aff69cf95cf3cb99a9da8cee595f75d Mon Sep 17 00:00:00 2001 From: Alex Orozco Date: Fri, 18 Apr 2025 16:37:11 +0000 Subject: [PATCH 081/294] devices: Implement DMA for fw_cfg device We pass a reference to the guest memory when we create the device in DeviceManager. This allows us to access the guest memory for DMA. Signed-off-by: Alex Orozco --- Cargo.lock | 1 + devices/Cargo.toml | 3 +- devices/src/legacy/fw_cfg.rs | 265 +++++++++++++++++++++++++++++++++-- vmm/src/device_manager.rs | 4 +- 4 files changed, 257 insertions(+), 16 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index e4f4454ece..5a78ae54c2 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -526,6 +526,7 @@ dependencies = [ "acpi_tables", "anyhow", "arch", + "bitfield-struct", "bitflags 2.9.0", "byteorder", "event_monitor", diff --git a/devices/Cargo.toml b/devices/Cargo.toml index c35f58b880..81776dbc34 100644 --- a/devices/Cargo.toml +++ b/devices/Cargo.toml @@ -8,6 +8,7 @@ version = "0.1.0" acpi_tables = { workspace = true } anyhow = { workspace = true } arch = { path = "../arch" } +bitfield-struct = { version = "0.10.1", optional = true } bitflags = { workspace = true } byteorder = { workspace = true } event_monitor = { path = "../event_monitor" } @@ -43,6 +44,6 @@ arch = { path = "../arch" } [features] default = [] -fw_cfg = ["linux-loader", "zerocopy"] +fw_cfg = ["bitfield-struct", "linux-loader", "zerocopy"] kvm = ["arch/kvm"] pvmemcontrol = [] diff --git a/devices/src/legacy/fw_cfg.rs b/devices/src/legacy/fw_cfg.rs index c12c50d5da..78bf875740 100644 --- a/devices/src/legacy/fw_cfg.rs +++ b/devices/src/legacy/fw_cfg.rs @@ -13,7 +13,7 @@ /// only firmware must implement mechanism to interact with this fw_cfg device use std::{ fs::File, - io::Result, + io::{ErrorKind, Read, Result, Seek, SeekFrom}, mem::offset_of, os::unix::fs::FileExt, sync::{Arc, Barrier}, @@ -30,16 +30,18 @@ use arch::layout::{ MEM_32BIT_RESERVED_START, PCI_MMCONFIG_SIZE, PCI_MMCONFIG_START, RAM_64BIT_START, }; use arch::RegionType; +use bitfield_struct::bitfield; #[cfg(target_arch = "x86_64")] use linux_loader::bootparam::boot_params; #[cfg(target_arch = "aarch64")] use linux_loader::loader::pe::arm64_image_header as boot_params; use vm_device::BusDevice; -use vm_memory::ByteValued; -#[cfg(target_arch = "x86_64")] -use vm_memory::GuestAddress; +use vm_memory::bitmap::AtomicBitmap; +use vm_memory::{ + ByteValued, Bytes, GuestAddress, GuestAddressSpace, GuestMemoryAtomic, GuestMemoryMmap, +}; use vmm_sys_util::sock_ctrl_msg::IntoIovec; -use zerocopy::{FromBytes, Immutable, IntoBytes}; +use zerocopy::{FromBytes, FromZeros, Immutable, IntoBytes}; #[cfg(target_arch = "x86_64")] // https://github.com/project-oak/oak/tree/main/stage0_bin#memory-layout @@ -114,6 +116,34 @@ pub enum FwCfgContent { U32(u32), } +struct FwCfgContentAccess<'a> { + content: &'a FwCfgContent, + offset: u32, +} + +impl Read for FwCfgContentAccess<'_> { + fn read(&mut self, buf: &mut [u8]) -> Result { + match self.content { + FwCfgContent::File(offset, f) => { + Seek::seek(&mut (&*f), SeekFrom::Start(offset + self.offset as u64))?; + Read::read(&mut (&*f), buf) + } + FwCfgContent::Bytes(b) => match b.get(self.offset as usize..) { + Some(mut s) => s.read(buf), + None => Err(ErrorKind::UnexpectedEof)?, + }, + FwCfgContent::Slice(b) => match b.get(self.offset as usize..) { + Some(mut s) => s.read(buf), + None => Err(ErrorKind::UnexpectedEof)?, + }, + FwCfgContent::U32(n) => match n.to_le_bytes().get(self.offset as usize..) { + Some(mut s) => s.read(buf), + None => Err(ErrorKind::UnexpectedEof)?, + }, + } + } +} + impl Default for FwCfgContent { fn default() -> Self { FwCfgContent::Slice(&[]) @@ -130,6 +160,12 @@ impl FwCfgContent { }; u32::try_from(ret).map_err(|_| std::io::ErrorKind::InvalidInput.into()) } + fn access(&self, offset: u32) -> FwCfgContentAccess<'_> { + FwCfgContentAccess { + content: self, + offset, + } + } } #[derive(Debug, Default)] @@ -139,12 +175,45 @@ pub struct FwCfgItem { } /// https://www.qemu.org/docs/master/specs/fw_cfg.html -#[derive(Debug, Default)] +#[derive(Debug)] pub struct FwCfg { selector: u16, data_offset: u32, + dma_address: u64, items: Vec, // 0x20 and above known_items: [FwCfgContent; FW_CFG_KNOWN_ITEMS], // 0x0 to 0x19 + memory: GuestMemoryAtomic>, +} + +#[repr(C)] +#[derive(Debug, IntoBytes, FromBytes)] +struct FwCfgDmaAccess { + control_be: u32, + length_be: u32, + address_be: u64, +} + +// https://github.com/torvalds/linux/blob/master/include/uapi/linux/qemu_fw_cfg.h#L67 +#[bitfield(u32)] +struct AccessControl { + // FW_CFG_DMA_CTL_ERROR = 0x01 + error: bool, + // FW_CFG_DMA_CTL_READ = 0x02 + read: bool, + #[bits(1)] + _unused2: u8, + // FW_CFG_DMA_CTL_SKIP = 0x04 + skip: bool, + #[bits(3)] + _unused3: u8, + // FW_CFG_DMA_CTL_ERROR = 0x08 + select: bool, + #[bits(7)] + _unused4: u8, + // FW_CFG_DMA_CTL_WRITE = 0x10 + write: bool, + #[bits(16)] + _unused: u32, } #[repr(C)] @@ -344,7 +413,7 @@ fn create_acpi_loader(acpi_table: AcpiTable) -> [FwCfgItem; 3] { } impl FwCfg { - pub fn new() -> FwCfg { + pub fn new(memory: GuestMemoryAtomic>) -> FwCfg { const DEFAULT_ITEM: FwCfgContent = FwCfgContent::Slice(&[]); let mut known_items = [DEFAULT_ITEM; FW_CFG_KNOWN_ITEMS]; known_items[FW_CFG_SIGNATURE as usize] = FwCfgContent::Slice(&FW_CFG_DMA_SIGNATURE); @@ -355,8 +424,10 @@ impl FwCfg { FwCfg { selector: 0, data_offset: 0, + dma_address: 0, items: vec![], known_items, + memory, } } @@ -448,6 +519,87 @@ impl FwCfg { Ok(()) } + fn dma_read_content( + &self, + content: &FwCfgContent, + offset: u32, + len: u32, + address: u64, + ) -> Result { + let content_size = content.size()?.saturating_sub(offset); + let op_size = std::cmp::min(content_size, len); + let mut access = content.access(offset); + let mut buf = vec![0u8; op_size as usize]; + access.read_exact(buf.as_mut_bytes())?; + let r = self + .memory + .memory() + .write(buf.as_bytes(), GuestAddress(address)); + match r { + Err(e) => { + error!("fw_cfg: dma read error: {e:x?}"); + Err(ErrorKind::InvalidInput.into()) + } + Ok(size) => Ok(size as u32), + } + } + + fn dma_read(&mut self, selector: u16, len: u32, address: u64) -> Result<()> { + let op_size = if let Some(content) = self.known_items.get(selector as usize) { + self.dma_read_content(content, self.data_offset, len, address) + } else if let Some(item) = self.items.get((selector - FW_CFG_FILE_FIRST) as usize) { + self.dma_read_content(&item.content, self.data_offset, len, address) + } else { + error!("fw_cfg: selector {selector:#x} does not exist."); + Err(ErrorKind::NotFound.into()) + }?; + self.data_offset += op_size; + Ok(()) + } + + fn do_dma(&mut self) { + let dma_address = self.dma_address; + let mut access = FwCfgDmaAccess::new_zeroed(); + let dma_access = match self + .memory + .memory() + .read(access.as_mut_bytes(), GuestAddress(dma_address)) + { + Ok(_) => access, + Err(e) => { + error!("fw_cfg: invalid address of dma access {dma_address:#x}: {e:?}"); + return; + } + }; + let control = AccessControl(u32::from_be(dma_access.control_be)); + if control.select() { + self.selector = control.select() as u16; + } + let len = u32::from_be(dma_access.length_be); + let addr = u64::from_be(dma_access.address_be); + let ret = if control.read() { + self.dma_read(self.selector, len, addr) + } else if control.write() { + Err(ErrorKind::InvalidInput.into()) + } else if control.skip() { + self.data_offset += len; + Ok(()) + } else { + Err(ErrorKind::InvalidData.into()) + }; + let mut access_resp = AccessControl(0); + if let Err(e) = ret { + error!("fw_cfg: dma operation {dma_access:x?}: {e:x?}"); + access_resp.set_error(true); + } + if let Err(e) = self.memory.memory().write( + &access_resp.0.to_be_bytes(), + GuestAddress(dma_address + core::mem::offset_of!(FwCfgDmaAccess, control_be) as u64), + ) { + error!("fw_cfg: finishing dma: {e:?}") + } + } + pub fn add_kernel_data(&mut self, file: &File) -> Result<()> { let mut buffer = vec![0u8; size_of::()]; file.read_exact_at(&mut buffer, 0)?; @@ -560,10 +712,14 @@ impl BusDevice for FwCfg { } (PORT_FW_CFG_DATA, _) => _ = self.read_data(data, size as u32), (PORT_FW_CFG_DMA_HI, 4) => { - unimplemented!() + let addr = self.dma_address; + let addr_hi = (addr >> 32) as u32; + data.copy_from_slice(&addr_hi.to_be_bytes()); } (PORT_FW_CFG_DMA_LO, 4) => { - unimplemented!() + let addr = self.dma_address; + let addr_lo = (addr & 0xffff_ffff) as u32; + data.copy_from_slice(&addr_lo.to_be_bytes()); } _ => { debug!("fw_cfg: read from unknown port {port:#x}: {size:#x} bytes and offset {offset:#x}."); @@ -587,10 +743,19 @@ impl BusDevice for FwCfg { } (PORT_FW_CFG_DATA, 1) => error!("fw_cfg: data register is read-only."), (PORT_FW_CFG_DMA_HI, 4) => { - unimplemented!() + let mut buf = [0u8; 4]; + buf[..size].copy_from_slice(&data[..size]); + let val = u32::from_be_bytes(buf); + self.dma_address &= 0xffff_ffff; + self.dma_address |= (val as u64) << 32; } (PORT_FW_CFG_DMA_LO, 4) => { - unimplemented!() + let mut buf = [0u8; 4]; + buf[..size].copy_from_slice(&data[..size]); + let val = u32::from_be_bytes(buf); + self.dma_address &= !0xffff_ffff; + self.dma_address |= val as u64; + self.do_dma(); } _ => debug!( "fw_cfg: write to unknown port {port:#x}: {size:#x} bytes and offset {offset:#x} ." @@ -617,10 +782,18 @@ mod tests { const DATA_OFFSET: u64 = 1; #[cfg(target_arch = "aarch64")] const DATA_OFFSET: u64 = 0; + #[cfg(target_arch = "x86_64")] + const DMA_OFFSET: u64 = 4; + #[cfg(target_arch = "aarch64")] + const DMA_OFFSET: u64 = 16; #[test] fn test_signature() { - let mut fw_cfg = FwCfg::new(); + let gm = GuestMemoryAtomic::new( + GuestMemoryMmap::from_ranges(&[(GuestAddress(0), RAM_64BIT_START.0 as usize)]).unwrap(), + ); + + let mut fw_cfg = FwCfg::new(gm); let mut data = vec![0u8]; @@ -637,7 +810,11 @@ mod tests { } #[test] fn test_kernel_cmdline() { - let mut fw_cfg = FwCfg::new(); + let gm = GuestMemoryAtomic::new( + GuestMemoryMmap::from_ranges(&[(GuestAddress(0), RAM_64BIT_START.0 as usize)]).unwrap(), + ); + + let mut fw_cfg = FwCfg::new(gm); let cmdline = *b"cmdline\0"; @@ -659,7 +836,11 @@ mod tests { #[test] fn test_initram_fs() { - let mut fw_cfg = FwCfg::new(); + let gm = GuestMemoryAtomic::new( + GuestMemoryMmap::from_ranges(&[(GuestAddress(0), RAM_64BIT_START.0 as usize)]).unwrap(), + ); + + let mut fw_cfg = FwCfg::new(gm); let temp = TempFile::new().unwrap(); let mut temp_file = temp.as_file(); @@ -682,4 +863,60 @@ mod tests { } } } + + #[test] + fn test_dma() { + let code = [ + 0xba, 0xf8, 0x03, 0x00, 0xd8, 0x04, b'0', 0xee, 0xb0, b'\n', 0xee, 0xf4, + ]; + + let content = FwCfgContent::Bytes(code.to_vec()); + + let mem_size = 0x1000; + let load_addr = GuestAddress(0x1000); + let mem: GuestMemoryMmap = + GuestMemoryMmap::from_ranges(&[(load_addr, mem_size)]).unwrap(); + + // Note: In firmware we would just allocate FwCfgDmaAccess struct + // and use address of struct (&) as dma address + let mut access_control = AccessControl(0); + // bit 1 = read access + access_control.set_read(true); + // length of data to access + let length_be = (code.len() as u32).to_be(); + // guest address for data + let code_address = 0x1900_u64; + let address_be = code_address.to_be(); + let mut access = FwCfgDmaAccess { + control_be: access_control.0.to_be(), // bit(1) = read bit + length_be, + address_be, + }; + // access address is where to put the code + let access_address = GuestAddress(load_addr.0); + let address_bytes = access_address.0.to_be_bytes(); + let dma_lo: [u8; 4] = address_bytes[0..4].try_into().unwrap(); + let dma_hi: [u8; 4] = address_bytes[4..8].try_into().unwrap(); + + // writing the FwCfgDmaAccess to mem (this would just be self.dma_access.as_ref() in guest) + let _ = mem.write(access.as_mut_bytes(), access_address); + let mem_m = GuestMemoryAtomic::new(mem.clone()); + let mut fw_cfg = FwCfg::new(mem_m); + let cfg_item = FwCfgItem { + name: "code".to_string(), + content, + }; + let _ = fw_cfg.add_item(cfg_item); + + let mut data = [0u8; 12]; + + let _ = mem.read(&mut data, GuestAddress(code_address)); + assert_ne!(data, code); + + fw_cfg.write(0, SELECTOR_OFFSET, &[FW_CFG_FILE_FIRST as u8, 0]); + fw_cfg.write(0, DMA_OFFSET, &dma_lo); + fw_cfg.write(0, DMA_OFFSET + 4, &dma_hi); + let _ = mem.read(&mut data, GuestAddress(code_address)); + assert_eq!(data, code); + } } diff --git a/vmm/src/device_manager.rs b/vmm/src/device_manager.rs index afff343354..db9c0792b6 100644 --- a/vmm/src/device_manager.rs +++ b/vmm/src/device_manager.rs @@ -1477,7 +1477,9 @@ impl DeviceManager { #[cfg(feature = "fw_cfg")] pub fn create_fw_cfg_device(&mut self) -> Result<(), DeviceManagerError> { - let fw_cfg = Arc::new(Mutex::new(devices::legacy::FwCfg::new())); + let fw_cfg = Arc::new(Mutex::new(devices::legacy::FwCfg::new( + self.memory_manager.lock().as_ref().unwrap().guest_memory(), + ))); self.fw_cfg = Some(fw_cfg.clone()); From 971f552e093445f9cc6205e570622fd8033905dc Mon Sep 17 00:00:00 2001 From: Alex Orozco Date: Mon, 19 May 2025 21:38:21 +0000 Subject: [PATCH 082/294] vmm: Add acpi table for fw_cfg device This allows the fw_cfg device to be recognized by the guest linux kernel. This becomes more relavnt in the following cl where I add the option to load files into the guest via fw_cfg. The Linux kernel already has a fw_cfg driver that will automatically load these files under /sys when CONFIG_FW_CFG_SYSFS is enabled in the kernel config For arm we must add fw_cfg to the devices tree Signed-off-by: Alex Orozco --- arch/Cargo.toml | 1 + arch/src/aarch64/fdt.rs | 17 +++++++++++++ arch/src/lib.rs | 3 +++ devices/Cargo.toml | 2 +- devices/src/legacy/fw_cfg.rs | 2 ++ vmm/src/device_manager.rs | 49 +++++++++++++++++++++++++++++++++--- 6 files changed, 69 insertions(+), 5 deletions(-) diff --git a/arch/Cargo.toml b/arch/Cargo.toml index 3e2202f025..4739c14f36 100644 --- a/arch/Cargo.toml +++ b/arch/Cargo.toml @@ -6,6 +6,7 @@ version = "0.1.0" [features] default = [] +fw_cfg = [] kvm = ["hypervisor/kvm"] sev_snp = [] tdx = [] diff --git a/arch/src/aarch64/fdt.rs b/arch/src/aarch64/fdt.rs index 23df4d805a..3b8cfbcc48 100644 --- a/arch/src/aarch64/fdt.rs +++ b/arch/src/aarch64/fdt.rs @@ -850,6 +850,21 @@ fn create_gpio_node( Ok(()) } +// https://www.kernel.org/doc/Documentation/devicetree/bindings/arm/fw-cfg.txt +#[cfg(feature = "fw_cfg")] +fn create_fw_cfg_node( + fdt: &mut FdtWriter, + dev_info: &T, +) -> FdtWriterResult<()> { + // FwCfg node + let fw_cfg_node = fdt.begin_node(&format!("fw-cfg@{:x}", dev_info.addr()))?; + fdt.property("compatible", b"qemu,fw-cfg-mmio\0")?; + fdt.property_array_u64("reg", &[dev_info.addr(), dev_info.length()])?; + fdt.end_node(fw_cfg_node)?; + + Ok(()) +} + fn create_devices_node( fdt: &mut FdtWriter, dev_info: &HashMap<(DeviceType, String), T, S>, @@ -865,6 +880,8 @@ fn create_devices_node { ordered_virtio_device.push(info); } + #[cfg(feature = "fw_cfg")] + DeviceType::FwCfg => create_fw_cfg_node(fdt, info)?, } } diff --git a/arch/src/lib.rs b/arch/src/lib.rs index 333a65d9c4..bbca3e4ea7 100644 --- a/arch/src/lib.rs +++ b/arch/src/lib.rs @@ -155,6 +155,9 @@ pub enum DeviceType { /// Device Type: GPIO. #[cfg(target_arch = "aarch64")] Gpio, + /// Device Type: fw_cfg. + #[cfg(feature = "fw_cfg")] + FwCfg, } /// Default (smallest) memory page size for the supported architectures. diff --git a/devices/Cargo.toml b/devices/Cargo.toml index 81776dbc34..d2fcc4a94a 100644 --- a/devices/Cargo.toml +++ b/devices/Cargo.toml @@ -44,6 +44,6 @@ arch = { path = "../arch" } [features] default = [] -fw_cfg = ["bitfield-struct", "linux-loader", "zerocopy"] +fw_cfg = ["arch/fw_cfg", "bitfield-struct", "linux-loader", "zerocopy"] kvm = ["arch/kvm"] pvmemcontrol = [] diff --git a/devices/src/legacy/fw_cfg.rs b/devices/src/legacy/fw_cfg.rs index 78bf875740..4e96d45359 100644 --- a/devices/src/legacy/fw_cfg.rs +++ b/devices/src/legacy/fw_cfg.rs @@ -91,6 +91,8 @@ const FW_CFG_KNOWN_ITEMS: usize = 0x20; pub const FW_CFG_FILE_FIRST: u16 = 0x20; pub const FW_CFG_DMA_SIGNATURE: [u8; 8] = *b"QEMU CFG"; +// https://github.com/torvalds/linux/blob/master/include/uapi/linux/qemu_fw_cfg.h +pub const FW_CFG_ACPI_ID: &str = "QEMU0002"; // Reserved (must be enabled) const FW_CFG_F_RESERVED: u8 = 1 << 0; // DMA Toggle Bit (enabled by default) diff --git a/vmm/src/device_manager.rs b/vmm/src/device_manager.rs index db9c0792b6..53e2d1a4ac 100644 --- a/vmm/src/device_manager.rs +++ b/vmm/src/device_manager.rs @@ -53,6 +53,8 @@ use devices::gic; use devices::interrupt_controller::InterruptController; #[cfg(target_arch = "x86_64")] use devices::ioapic; +#[cfg(all(feature = "fw_cfg", target_arch = "x86_64"))] +use devices::legacy::fw_cfg::FW_CFG_ACPI_ID; #[cfg(target_arch = "aarch64")] use devices::legacy::Pl011; #[cfg(any(target_arch = "x86_64", target_arch = "riscv64"))] @@ -1495,11 +1497,29 @@ impl DeviceManager { // default address for fw_cfg on arm via mmio // https://github.com/torvalds/linux/blob/master/drivers/firmware/qemu_fw_cfg.c#L27 #[cfg(target_arch = "aarch64")] - self.address_manager - .mmio_bus - .insert(fw_cfg.clone(), PORT_FW_CFG_BASE, PORT_FW_CFG_WIDTH) - .map_err(DeviceManagerError::ErrorAddingFwCfgToBus)?; + { + self.address_manager + .mmio_bus + .insert(fw_cfg.clone(), PORT_FW_CFG_BASE, PORT_FW_CFG_WIDTH) + .map_err(DeviceManagerError::ErrorAddingFwCfgToBus)?; + let fw_cfg_irq = self + .address_manager + .allocator + .lock() + .unwrap() + .allocate_irq() + .unwrap(); + + self.id_to_dev_info.insert( + (DeviceType::FwCfg, "fw-cfg".to_string()), + MmioDeviceInfo { + addr: PORT_FW_CFG_BASE, + len: PORT_FW_CFG_WIDTH, + irq: fw_cfg_irq, + }, + ); + } Ok(()) } @@ -5003,6 +5023,27 @@ impl Aml for DeviceManager { ) .to_aml_bytes(sink); + #[cfg(all(feature = "fw_cfg", target_arch = "x86_64"))] + if self.fw_cfg.is_some() { + aml::Device::new( + "_SB_.FWCF".into(), + vec![ + &aml::Name::new("_HID".into(), &FW_CFG_ACPI_ID.to_string()), + &aml::Name::new("_STA".into(), &0xB_usize), + &aml::Name::new( + "_CRS".into(), + &aml::ResourceTemplate::new(vec![&aml::IO::new( + PORT_FW_CFG_BASE as u16, + PORT_FW_CFG_BASE as u16, + 0x01, + PORT_FW_CFG_WIDTH as u8, + )]), + ), + ], + ) + .to_aml_bytes(sink); + } + // Serial device #[cfg(target_arch = "x86_64")] let serial_irq = 4; From a70c1b38e7681fb2cf9480176401705b1086d9b4 Mon Sep 17 00:00:00 2001 From: Alex Orozco Date: Mon, 19 May 2025 21:41:41 +0000 Subject: [PATCH 083/294] devices: Add fw_cfg cli options This allows us to enable/disable the fw_cfg device via the cli We can also now upload files into the guest vm using fw_cfg_items via the cli Signed-off-by: Alex Orozco --- devices/src/legacy/fw_cfg.rs | 28 +++++ src/main.rs | 10 ++ vmm/src/config.rs | 183 ++++++++++++++++++++++++++++++- vmm/src/lib.rs | 2 + vmm/src/vm.rs | 206 ++++++++++++++++++++++------------- vmm/src/vm_config.rs | 75 +++++++++++++ 6 files changed, 429 insertions(+), 75 deletions(-) diff --git a/devices/src/legacy/fw_cfg.rs b/devices/src/legacy/fw_cfg.rs index 4e96d45359..02c52c707f 100644 --- a/devices/src/legacy/fw_cfg.rs +++ b/devices/src/legacy/fw_cfg.rs @@ -433,6 +433,34 @@ impl FwCfg { } } + pub fn populate_fw_cfg( + &mut self, + mem_size: Option, + kernel: Option, + initramfs: Option, + cmdline: Option, + fw_cfg_item_list: Option>, + ) -> Result<()> { + if let Some(mem_size) = mem_size { + self.add_e820(mem_size)? + } + if let Some(kernel) = kernel { + self.add_kernel_data(&kernel)?; + } + if let Some(cmdline) = cmdline { + self.add_kernel_cmdline(cmdline); + } + if let Some(initramfs) = initramfs { + self.add_initramfs_data(&initramfs)? + } + if let Some(fw_cfg_item_list) = fw_cfg_item_list { + for item in fw_cfg_item_list { + self.add_item(item)?; + } + } + Ok(()) + } + pub fn add_e820(&mut self, mem_size: usize) -> Result<()> { #[cfg(target_arch = "x86_64")] let mut mem_regions = vec![ diff --git a/src/main.rs b/src/main.rs index bf0d9089d3..4ba766e055 100644 --- a/src/main.rs +++ b/src/main.rs @@ -27,6 +27,8 @@ use vmm::api::ApiAction; use vmm::config::{RestoreConfig, VmParams}; use vmm::landlock::{Landlock, LandlockError}; use vmm::vm_config; +#[cfg(feature = "fw_cfg")] +use vmm::vm_config::FwCfgConfig; #[cfg(target_arch = "x86_64")] use vmm::vm_config::SgxEpcConfig; use vmm::vm_config::{ @@ -269,6 +271,12 @@ fn get_cli_options_sorted( .help(FsConfig::SYNTAX) .num_args(1..) .group("vm-config"), + #[cfg(feature = "fw_cfg")] + Arg::new("fw-cfg-config") + .long("fw-cfg-config") + .help(FwCfgConfig::SYNTAX) + .num_args(1) + .group("vm-payload"), #[cfg(feature = "guest_debug")] Arg::new("gdb") .long("gdb") @@ -979,6 +987,8 @@ mod unit_tests { igvm: None, #[cfg(feature = "sev_snp")] host_data: None, + #[cfg(feature = "fw_cfg")] + fw_cfg_config: None, }), rate_limit_groups: None, disks: None, diff --git a/vmm/src/config.rs b/vmm/src/config.rs index b2d940a66b..28d8cc6d8c 100644 --- a/vmm/src/config.rs +++ b/vmm/src/config.rs @@ -163,6 +163,10 @@ pub enum Error { /// Missing fields in Landlock rules #[error("Error parsing --landlock-rules: path/access field missing")] ParseLandlockMissingFields, + #[cfg(feature = "fw_cfg")] + /// Failed Parsing FwCfgItem config + #[error("Error parsing --fw-cfg-config items")] + ParseFwCfgItem(#[source] OptionParserError), } #[derive(Debug, PartialEq, Eq, Error)] @@ -318,6 +322,18 @@ pub enum ValidationError { /// Invalid block device serial length #[error("Block device serial length ({0}) exceeds maximum allowed length ({1})")] InvalidSerialLength(usize, usize), + #[cfg(feature = "fw_cfg")] + /// FwCfg missing kernel + #[error("Error --fw-cfg-config: missing --kernel")] + FwCfgMissingKernel, + #[cfg(feature = "fw_cfg")] + /// FwCfg missing cmdline + #[error("Error --fw-cfg-config: missing --cmdline")] + FwCfgMissingCmdline, + #[cfg(feature = "fw_cfg")] + /// FwCfg missing initramfs + #[error("Error --fw-cfg-config: missing --initramfs")] + FwCfgMissingInitramfs, } type ValidationResult = std::result::Result; @@ -373,6 +389,8 @@ pub struct VmParams<'a> { pub host_data: Option<&'a str>, pub landlock_enable: bool, pub landlock_rules: Option>, + #[cfg(feature = "fw_cfg")] + pub fw_cfg_config: Option<&'a str>, } impl<'a> VmParams<'a> { @@ -444,7 +462,9 @@ impl<'a> VmParams<'a> { let landlock_rules: Option> = args .get_many::("landlock-rules") .map(|x| x.map(|y| y as &str).collect()); - + #[cfg(feature = "fw_cfg")] + let fw_cfg_config: Option<&str> = + args.get_one::("fw-cfg-config").map(|x| x as &str); VmParams { cpus, memory, @@ -486,6 +506,8 @@ impl<'a> VmParams<'a> { host_data, landlock_enable, landlock_rules, + #[cfg(feature = "fw_cfg")] + fw_cfg_config, } } } @@ -1603,6 +1625,102 @@ impl FsConfig { } } +#[cfg(feature = "fw_cfg")] +impl FwCfgConfig { + pub const SYNTAX: &'static str = "Boot params to pass to FW CFG device \ + \"e820=on|off,kernel=on|off,cmdline=on|off,initramfs=on|off,acpi_table=on|off, \ + items=[name0=,file0=:name1=,file1=]\""; + pub fn parse(fw_cfg_config: &str) -> Result { + let mut parser = OptionParser::new(); + parser + .add("e820") + .add("kernel") + .add("cmdline") + .add("initramfs") + .add("acpi_table") + .add("items"); + parser.parse(fw_cfg_config).map_err(Error::ParseFwCfgItem)?; + let e820 = parser + .convert::("e820") + .map_err(Error::ParseFwCfgItem)? + .unwrap_or(Toggle(true)) + .0; + let kernel = parser + .convert::("kernel") + .map_err(Error::ParseFwCfgItem)? + .unwrap_or(Toggle(true)) + .0; + let cmdline = parser + .convert::("cmdline") + .map_err(Error::ParseFwCfgItem)? + .unwrap_or(Toggle(true)) + .0; + let initramfs = parser + .convert::("initramfs") + .map_err(Error::ParseFwCfgItem)? + .unwrap_or(Toggle(true)) + .0; + let acpi_tables = parser + .convert::("acpi_table") + .map_err(Error::ParseFwCfgItem)? + .unwrap_or(Toggle(true)) + .0; + let items = if parser.is_set("items") { + Some( + parser + .convert::("items") + .map_err(Error::ParseFwCfgItem)? + .unwrap(), + ) + } else { + None + }; + + Ok(FwCfgConfig { + e820, + kernel, + cmdline, + initramfs, + acpi_tables, + items, + }) + } + pub fn validate(&self, vm_config: &VmConfig) -> ValidationResult<()> { + let payload = vm_config.payload.as_ref().unwrap(); + if self.kernel && payload.kernel.is_none() { + return Err(ValidationError::FwCfgMissingKernel); + } else if self.cmdline && payload.cmdline.is_none() { + return Err(ValidationError::FwCfgMissingCmdline); + } else if self.initramfs && payload.initramfs.is_none() { + return Err(ValidationError::FwCfgMissingInitramfs); + } + Ok(()) + } +} + +#[cfg(feature = "fw_cfg")] +impl FwCfgItem { + pub fn parse(fw_cfg: &str) -> Result { + let mut parser = OptionParser::new(); + parser.add("name").add("file"); + parser.parse(fw_cfg).map_err(Error::ParseFwCfgItem)?; + + let name = + parser + .get("name") + .ok_or(Error::ParseFwCfgItem(OptionParserError::InvalidValue( + "missing FwCfgItem name".to_string(), + )))?; + let file = parser + .get("file") + .map(PathBuf::from) + .ok_or(Error::ParseFwCfgItem(OptionParserError::InvalidValue( + "missing FwCfgItem file path".to_string(), + )))?; + Ok(FwCfgItem { name, file }) + } +} + impl PmemConfig { pub const SYNTAX: &'static str = "Persistent memory parameters \ \"file=,size=,iommu=on|off,\ @@ -2661,6 +2779,14 @@ impl VmConfig { disks = Some(disk_config_list); } + #[cfg(feature = "fw_cfg")] + let fw_cfg_config = if let Some(fw_cfg_config_str) = vm_params.fw_cfg_config { + let fw_cfg_config = FwCfgConfig::parse(fw_cfg_config_str)?; + Some(fw_cfg_config) + } else { + None + }; + let mut net: Option> = None; if let Some(net_list) = &vm_params.net { let mut net_config_list = Vec::new(); @@ -2797,6 +2923,8 @@ impl VmConfig { igvm: vm_params.igvm.map(PathBuf::from), #[cfg(feature = "sev_snp")] host_data: vm_params.host_data.map(|s| s.to_string()), + #[cfg(feature = "fw_cfg")] + fw_cfg_config, }) } else { None @@ -3939,6 +4067,8 @@ mod tests { host_data: Some( "243eb7dc1a21129caa91dcbb794922b933baecb5823a377eb431188673288c07".to_string(), ), + #[cfg(feature = "fw_cfg")] + fw_cfg_config: None, }), rate_limit_groups: None, disks: None, @@ -4556,6 +4686,8 @@ mod tests { igvm: None, #[cfg(feature = "sev_snp")] host_data: Some("".to_string()), + #[cfg(feature = "fw_cfg")] + fw_cfg_config: None, }); config_with_no_host_data.validate().unwrap_err(); @@ -4570,6 +4702,8 @@ mod tests { igvm: None, #[cfg(feature = "sev_snp")] host_data: None, + #[cfg(feature = "fw_cfg")] + fw_cfg_config: None, }); valid_config_with_no_host_data.validate().unwrap(); @@ -4586,6 +4720,8 @@ mod tests { host_data: Some( "243eb7dc1a21129caa91dcbb794922b933baecb5823a377eb43118867328".to_string(), ), + #[cfg(feature = "fw_cfg")] + fw_cfg_config: None, }); config_with_invalid_host_data.validate().unwrap_err(); } @@ -4617,4 +4753,49 @@ mod tests { ); Ok(()) } + #[test] + #[cfg(feature = "fw_cfg")] + fn test_fw_cfg_config_item_list_parsing() -> Result<()> { + // Empty list + FwCfgConfig::parse("items=[]").unwrap_err(); + // Missing closing bracket + FwCfgConfig::parse("items=[name=opt/org.test/fw_cfg_test_item,file=/tmp/fw_cfg_test_item") + .unwrap_err(); + // Single Item + assert_eq!( + FwCfgConfig::parse( + "items=[name=opt/org.test/fw_cfg_test_item,file=/tmp/fw_cfg_test_item]" + )?, + FwCfgConfig { + items: Some(FwCfgItemList { + item_list: vec![FwCfgItem { + name: "opt/org.test/fw_cfg_test_item".to_string(), + file: PathBuf::from("/tmp/fw_cfg_test_item"), + }] + }), + ..Default::default() + }, + ); + // Multiple Items + assert_eq!( + FwCfgConfig::parse( + "items=[name=opt/org.test/fw_cfg_test_item,file=/tmp/fw_cfg_test_item:name=opt/org.test/fw_cfg_test_item2,file=/tmp/fw_cfg_test_item2]" + )?, + FwCfgConfig { + items: Some(FwCfgItemList { + item_list: vec![FwCfgItem { + name: "opt/org.test/fw_cfg_test_item".to_string(), + file: PathBuf::from("/tmp/fw_cfg_test_item"), + }, + FwCfgItem { + name: "opt/org.test/fw_cfg_test_item2".to_string(), + file: PathBuf::from("/tmp/fw_cfg_test_item2"), + }] + }), + ..Default::default() + }, + + ); + Ok(()) + } } diff --git a/vmm/src/lib.rs b/vmm/src/lib.rs index ef99fb31a6..e51573159b 100644 --- a/vmm/src/lib.rs +++ b/vmm/src/lib.rs @@ -2391,6 +2391,8 @@ mod unit_tests { igvm: None, #[cfg(feature = "sev_snp")] host_data: None, + #[cfg(feature = "fw_cfg")] + fw_cfg_config: None, }), rate_limit_groups: None, disks: None, diff --git a/vmm/src/vm.rs b/vmm/src/vm.rs index bed0a4363a..4935bc2e9f 100644 --- a/vmm/src/vm.rs +++ b/vmm/src/vm.rs @@ -34,6 +34,8 @@ use arch::PciSpaceInfo; use arch::{get_host_cpu_phys_bits, EntryPoint, NumaNode, NumaNodes}; #[cfg(target_arch = "aarch64")] use devices::interrupt_controller; +#[cfg(feature = "fw_cfg")] +use devices::legacy::fw_cfg::FwCfgItem; use devices::AcpiNotificationFlags; #[cfg(all(target_arch = "aarch64", feature = "guest_debug"))] use gdbstub_arch::aarch64::reg::AArch64CoreRegs as CoreRegs; @@ -91,6 +93,8 @@ use crate::migration::get_vm_snapshot; #[cfg(all(target_arch = "x86_64", feature = "guest_debug"))] use crate::migration::url_to_file; use crate::migration::{url_to_path, SNAPSHOT_CONFIG_FILE, SNAPSHOT_STATE_FILE}; +#[cfg(feature = "fw_cfg")] +use crate::vm_config::FwCfgConfig; use crate::vm_config::{ DeviceConfig, DiskConfig, FsConfig, HotplugMethod, NetConfig, NumaConfig, PayloadConfig, PmemConfig, UserDeviceConfig, VdpaConfig, VmConfig, VsockConfig, @@ -359,6 +363,18 @@ pub enum Error { #[cfg(feature = "fw_cfg")] #[error("Error creating acpi tables")] CreatingAcpiTables(#[source] io::Error), + + #[cfg(feature = "fw_cfg")] + #[error("Error adding fw_cfg item")] + AddingFwCfgItem(#[source] io::Error), + + #[cfg(feature = "fw_cfg")] + #[error("Error populating fw_cfg")] + ErrorPopulatingFwCfg(#[source] io::Error), + + #[cfg(feature = "fw_cfg")] + #[error("Error using fw_cfg while disabled")] + FwCfgDisabled, } pub type Result = result::Result; @@ -741,11 +757,22 @@ impl Vm { } #[cfg(feature = "fw_cfg")] - device_manager - .lock() - .unwrap() - .create_fw_cfg_device() - .map_err(Error::DeviceManager)?; + { + let fw_cfg_config = config + .lock() + .unwrap() + .payload + .as_ref() + .map(|p| p.fw_cfg_config.is_some()) + .unwrap_or(false); + if fw_cfg_config { + device_manager + .lock() + .unwrap() + .create_fw_cfg_device() + .map_err(Error::DeviceManager)?; + } + } #[cfg(feature = "tdx")] let kernel = config @@ -806,76 +833,85 @@ impl Vm { #[cfg(feature = "fw_cfg")] fn populate_fw_cfg( + fw_cfg_config: &FwCfgConfig, device_manager: &Arc>, config: &Arc>, ) -> Result<()> { - device_manager - .lock() - .unwrap() - .fw_cfg() - .expect("fw_cfg device must be present") - .lock() - .unwrap() - .add_e820(config.lock().unwrap().memory.size as usize) - .map_err(Error::CreatingE820Map)?; - let kernel = config - .lock() - .unwrap() - .payload - .as_ref() - .map(|p| p.kernel.as_ref().map(File::open)) - .unwrap_or_default() - .transpose() - .map_err(Error::MissingFwCfgKernelFile)?; - if let Some(kernel_file) = kernel { - device_manager - .lock() - .unwrap() - .fw_cfg() - .expect("fw_cfg device must be present") - .lock() - .unwrap() - .add_kernel_data(&kernel_file) - .map_err(Error::MissingFwCfgKernelFile)? + let mut e820_option: Option = None; + if fw_cfg_config.e820 { + e820_option = Some(config.lock().unwrap().memory.size as usize); } - let cmdline = Vm::generate_cmdline( - config.lock().unwrap().payload.as_ref().unwrap(), - #[cfg(target_arch = "aarch64")] - device_manager, - ) - .map_err(|_| Error::MissingFwCfgCmdline)? - .as_cstring() - .map_err(|_| Error::MissingFwCfgCmdline)?; - device_manager - .lock() - .unwrap() - .fw_cfg() - .expect("fw_cfg device must be present") - .lock() - .unwrap() - .add_kernel_cmdline(cmdline); - let initramfs = config - .lock() - .unwrap() - .payload - .as_ref() - .map(|p| p.initramfs.as_ref().map(File::open)) - .unwrap_or_default() - .transpose() - .map_err(Error::MissingFwCfgInitramfs)?; - // We measure the initramfs when running Oak Containers in SNP mode (initramfs = Stage1) - // o/w use Stage0 to launch cloud disk images - if let Some(initramfs_file) = initramfs { - device_manager + let mut kernel_option: Option = None; + if fw_cfg_config.kernel { + let kernel = config .lock() .unwrap() - .fw_cfg() - .expect("fw_cfg device must be present") + .payload + .as_ref() + .map(|p| p.kernel.as_ref().map(File::open)) + .unwrap_or_default() + .transpose() + .map_err(Error::MissingFwCfgKernelFile)?; + kernel_option = kernel; + } + let mut cmdline_option: Option = None; + if fw_cfg_config.cmdline { + let cmdline = Vm::generate_cmdline( + config.lock().unwrap().payload.as_ref().unwrap(), + #[cfg(target_arch = "aarch64")] + device_manager, + ) + .map_err(|_| Error::MissingFwCfgCmdline)? + .as_cstring() + .map_err(|_| Error::MissingFwCfgCmdline)?; + cmdline_option = Some(cmdline); + } + let mut initramfs_option: Option = None; + if fw_cfg_config.initramfs { + let initramfs = config .lock() .unwrap() - .add_initramfs_data(&initramfs_file) + .payload + .as_ref() + .map(|p| p.initramfs.as_ref().map(File::open)) + .unwrap_or_default() + .transpose() .map_err(Error::MissingFwCfgInitramfs)?; + // We measure the initramfs when running Oak Containers in SNP mode (initramfs = Stage1) + // o/w use Stage0 to launch cloud disk images + initramfs_option = initramfs; + } + let mut fw_cfg_item_list_option: Option> = None; + if let Some(fw_cfg_files) = &fw_cfg_config.items { + let mut fw_cfg_item_list = vec![]; + for fw_cfg_file in fw_cfg_files.item_list.clone() { + fw_cfg_item_list.push(FwCfgItem { + name: fw_cfg_file.name, + content: devices::legacy::fw_cfg::FwCfgContent::File( + 0, + File::open(fw_cfg_file.file).map_err(Error::AddingFwCfgItem)?, + ), + }); + } + fw_cfg_item_list_option = Some(fw_cfg_item_list); } + + let device_manager_binding = device_manager.lock().unwrap(); + let Some(fw_cfg) = device_manager_binding.fw_cfg() else { + return Err(Error::FwCfgDisabled); + }; + + fw_cfg + .lock() + .unwrap() + .populate_fw_cfg( + e820_option, + kernel_option, + initramfs_option, + cmdline_option, + fw_cfg_item_list_option, + ) + .map_err(Error::ErrorPopulatingFwCfg)?; Ok(()) } @@ -2370,15 +2406,37 @@ impl Vm { #[cfg(feature = "fw_cfg")] { - Self::populate_fw_cfg(&self.device_manager, &self.config)?; - let tpm_enabled = self.config.lock().unwrap().tpm.is_some(); - crate::acpi::create_acpi_tables_for_fw_cfg( - &self.device_manager, - &self.cpu_manager, - &self.memory_manager, - &self.numa_nodes, - tpm_enabled, - )? + let fw_cfg_enabled = self + .config + .lock() + .unwrap() + .payload + .as_ref() + .map(|p| p.fw_cfg_config.is_some()) + .unwrap_or(false); + if fw_cfg_enabled { + let fw_cfg_config = self + .config + .lock() + .unwrap() + .payload + .as_ref() + .map(|p| p.fw_cfg_config.clone()) + .unwrap_or_default() + .ok_or(Error::VmMissingConfig)?; + Self::populate_fw_cfg(&fw_cfg_config, &self.device_manager, &self.config)?; + + if fw_cfg_config.acpi_tables { + let tpm_enabled = self.config.lock().unwrap().tpm.is_some(); + crate::acpi::create_acpi_tables_for_fw_cfg( + &self.device_manager, + &self.cpu_manager, + &self.memory_manager, + &self.numa_nodes, + tpm_enabled, + )? + } + } } // Do earlier to parallelise with loading kernel diff --git a/vmm/src/vm_config.rs b/vmm/src/vm_config.rs index 3e6cf447da..2d829a678a 100644 --- a/vmm/src/vm_config.rs +++ b/vmm/src/vm_config.rs @@ -4,6 +4,8 @@ // use std::net::{IpAddr, Ipv4Addr}; use std::path::PathBuf; +#[cfg(feature = "fw_cfg")] +use std::str::FromStr; use std::{fs, result}; use net_util::MacAddr; @@ -699,6 +701,79 @@ pub struct PayloadConfig { #[cfg(feature = "sev_snp")] #[serde(default)] pub host_data: Option, + #[cfg(feature = "fw_cfg")] + pub fw_cfg_config: Option, +} + +#[cfg(feature = "fw_cfg")] +#[derive(Clone, Debug, Deserialize, Eq, PartialEq, Serialize)] +pub struct FwCfgConfig { + pub e820: bool, + pub kernel: bool, + pub cmdline: bool, + pub initramfs: bool, + pub acpi_tables: bool, + pub items: Option, +} + +#[cfg(feature = "fw_cfg")] +impl Default for FwCfgConfig { + fn default() -> Self { + FwCfgConfig { + e820: true, + kernel: true, + cmdline: true, + initramfs: true, + acpi_tables: true, + items: None, + } + } +} + +#[cfg(feature = "fw_cfg")] +#[derive(Clone, Debug, Deserialize, Eq, PartialEq, Serialize)] +pub struct FwCfgItemList { + #[serde(default)] + pub item_list: Vec, +} + +#[cfg(feature = "fw_cfg")] +#[derive(Clone, Debug, Deserialize, Eq, PartialEq, Serialize)] +pub struct FwCfgItem { + #[serde(default)] + pub name: String, + #[serde(default)] + pub file: PathBuf, +} + +#[cfg(feature = "fw_cfg")] +pub enum FwCfgItemError { + InvalidValue(String), +} + +#[cfg(feature = "fw_cfg")] +impl FromStr for FwCfgItemList { + type Err = FwCfgItemError; + + fn from_str(s: &str) -> Result { + let body = s + .trim() + .strip_prefix('[') + .and_then(|s| s.strip_suffix(']')) + .ok_or_else(|| FwCfgItemError::InvalidValue(s.to_string()))?; + + let mut fw_cfg_items: Vec = vec![]; + let items: Vec<&str> = body.split(':').collect(); + for item in items { + fw_cfg_items.push( + FwCfgItem::parse(item) + .map_err(|_| FwCfgItemError::InvalidValue(item.to_string()))?, + ); + } + Ok(FwCfgItemList { + item_list: fw_cfg_items, + }) + } } impl ApplyLandlock for PayloadConfig { From 5d478c534ed38168cf7cc83485ee05e5c00d5e24 Mon Sep 17 00:00:00 2001 From: Alex Orozco Date: Thu, 12 Jun 2025 20:33:03 +0000 Subject: [PATCH 084/294] tests: Add fw_cfg device integration test This test verifies that we can see custom items added to the fw_cfg device from inside the guest Signed-off-by: Alex Orozco --- .github/workflows/build.yaml | 3 ++ .github/workflows/quality.yaml | 7 +++- scripts/run_integration_tests_aarch64.sh | 8 ++++ scripts/run_integration_tests_x86_64.sh | 8 ++++ tests/integration.rs | 52 ++++++++++++++++++++++++ 5 files changed, 77 insertions(+), 1 deletion(-) diff --git a/.github/workflows/build.yaml b/.github/workflows/build.yaml index 297b2e153d..5c32406646 100644 --- a/.github/workflows/build.yaml +++ b/.github/workflows/build.yaml @@ -52,6 +52,9 @@ jobs: - name: Build (default features + pvmemcontrol) run: cargo rustc --locked --bin cloud-hypervisor --features "pvmemcontrol" -- -D warnings -D clippy::undocumented_unsafe_blocks -W clippy::assertions_on_result_states + - name: Build (default features + fw_cfg) + run: cargo rustc --locked --bin cloud-hypervisor --features "fw_cfg" -- -D warnings -D clippy::undocumented_unsafe_blocks -W clippy::assertions_on_result_states + - name: Build (mshv) run: cargo rustc --locked --bin cloud-hypervisor --no-default-features --features "mshv" -- -D warnings -D clippy::undocumented_unsafe_blocks -W clippy::assertions_on_result_states diff --git a/.github/workflows/quality.yaml b/.github/workflows/quality.yaml index b4cddb3ce9..576acaffa9 100644 --- a/.github/workflows/quality.yaml +++ b/.github/workflows/quality.yaml @@ -111,7 +111,12 @@ jobs: toolchain: ${{ matrix.rust }} target: ${{ matrix.target }} args: --locked --all --all-targets --tests --examples --features "tracing" -- -D warnings -D clippy::undocumented_unsafe_blocks -W clippy::assertions_on_result_states - + - name: Clippy (default features + fw_cfg) + uses: actions-rs/cargo@v1 + with: + use-cross: ${{ matrix.target != 'x86_64-unknown-linux-gnu' }} + command: clippy + args: --target=${{ matrix.target }} --locked --all --all-targets --tests --examples --features "fw_cfg" -- -D warnings -D clippy::undocumented_unsafe_blocks -W clippy::assertions_on_result_states - name: Clippy (sev_snp) if: ${{ matrix.target == 'x86_64-unknown-linux-gnu' }} uses: houseabsolute/actions-rust-cross@v1 diff --git a/scripts/run_integration_tests_aarch64.sh b/scripts/run_integration_tests_aarch64.sh index 0daa672e84..262faff9a4 100755 --- a/scripts/run_integration_tests_aarch64.sh +++ b/scripts/run_integration_tests_aarch64.sh @@ -250,4 +250,12 @@ if [ $RES -eq 0 ]; then RES=$? fi +# Run tests on fw_cfg +if [ $RES -eq 0 ]; then + cargo build --features "fw_cfg" --all --release --target "$BUILD_TARGET" + export RUST_BACKTRACE=1 + time cargo test "fw_cfg::$test_filter" --target "$BUILD_TARGET" -- ${test_binary_args[*]} + RES=$? +fi + exit $RES diff --git a/scripts/run_integration_tests_x86_64.sh b/scripts/run_integration_tests_x86_64.sh index 4fec7d04c9..4f4491aa7f 100755 --- a/scripts/run_integration_tests_x86_64.sh +++ b/scripts/run_integration_tests_x86_64.sh @@ -198,4 +198,12 @@ if [ $RES -eq 0 ]; then RES=$? fi +# Run tests on fw_cfg +if [ $RES -eq 0 ]; then + cargo build --features "mshv,fw_cfg" --all --release --target "$BUILD_TARGET" + export RUST_BACKTRACE=1 + time cargo test "fw_cfg::$test_filter" --target "$BUILD_TARGET" -- ${test_binary_args[*]} + RES=$? +fi + exit $RES diff --git a/tests/integration.rs b/tests/integration.rs index d754ccff8a..dc19b7aee2 100644 --- a/tests/integration.rs +++ b/tests/integration.rs @@ -11069,3 +11069,55 @@ mod rate_limiter { _test_rate_limiter_group_block(false, 2, 2); } } + +#[cfg(not(target_arch = "riscv64"))] +mod fw_cfg { + use crate::*; + + #[test] + fn test_fw_cfg() { + let jammy = UbuntuDiskConfig::new(JAMMY_IMAGE_NAME.to_string()); + let guest = Guest::new(Box::new(jammy)); + let mut cmd = GuestCommand::new(&guest); + + let kernel_path = direct_kernel_boot_path(); + let cmd_line = DIRECT_KERNEL_BOOT_CMDLINE; + + let test_file = guest.tmp_dir.as_path().join("test-file"); + std::fs::write(&test_file, "test-file-content").unwrap(); + + cmd.args(["--cpus", "boot=4"]) + .args(["--memory", "size=512M"]) + .args(["--kernel", kernel_path.to_str().unwrap()]) + .args(["--cmdline", cmd_line]) + .default_disks() + .default_net() + .args([ + "--fw-cfg-config", + &format!( + "initramfs=off,items=[name=opt/org.test/test-file,file={}]", + test_file.to_str().unwrap() + ), + ]) + .capture_output(); + + let mut child = cmd.spawn().unwrap(); + + let r = std::panic::catch_unwind(|| { + guest.wait_vm_boot(None).unwrap(); + // Wait a while for guest + thread::sleep(std::time::Duration::new(3, 0)); + let result = guest + .ssh_command( + "sudo cat /sys/firmware/qemu_fw_cfg/by_name/opt/org.test/test-file/raw", + ) + .unwrap(); + assert_eq!(result, "test-file-content"); + }); + + kill_child(&mut child); + let output = child.wait_with_output().unwrap(); + + handle_child_output(r, &output); + } +} From aa8e9cd91a026646d24481725042e045a0bc9d5c Mon Sep 17 00:00:00 2001 From: Peter Oskolkov Date: Tue, 29 Jul 2025 19:02:20 +0000 Subject: [PATCH 085/294] misc: Change cpu ID type from u8 to u32 This is the first change to Cloud Hypervisor in a series of changes intended to increase the max number of supported vCPUs in guest VMs, which is currently limited to 255 (254 on x86_64). No user-visible/behavior changes are expected as a result of applying this patch, as the type of boot_cpus and related fields in config structs remains u8 for now, and all configuration validations remain the same. Signed-off-by: Barret Rhoden Signed-off-by: Neel Natu Signed-off-by: Ofir Weisse Signed-off-by: Peter Oskolkov --- arch/src/aarch64/fdt.rs | 2 +- arch/src/aarch64/mod.rs | 2 +- arch/src/lib.rs | 2 +- arch/src/riscv64/mod.rs | 2 +- arch/src/x86_64/mod.rs | 10 +-- arch/src/x86_64/mptable.rs | 23 +++---- devices/src/aia.rs | 2 +- devices/src/gic.rs | 2 +- fuzz/Cargo.lock | 28 +++----- fuzz/fuzz_targets/http_api.rs | 2 +- hypervisor/src/cpu.rs | 4 +- hypervisor/src/kvm/mod.rs | 8 +-- hypervisor/src/mshv/mod.rs | 7 +- hypervisor/src/vm.rs | 2 +- src/bin/ch-remote.rs | 2 +- vmm/src/acpi.rs | 4 +- vmm/src/api/mod.rs | 4 +- vmm/src/cpu.rs | 117 ++++++++++++++++++---------------- vmm/src/device_manager.rs | 4 +- vmm/src/lib.rs | 4 +- vmm/src/vm.rs | 8 +-- 21 files changed, 116 insertions(+), 123 deletions(-) diff --git a/arch/src/aarch64/fdt.rs b/arch/src/aarch64/fdt.rs index 3b8cfbcc48..238ad0b32a 100644 --- a/arch/src/aarch64/fdt.rs +++ b/arch/src/aarch64/fdt.rs @@ -370,7 +370,7 @@ fn create_cpu_nodes( if numa_nodes.len() > 1 { for numa_node_idx in 0..numa_nodes.len() { let numa_node = numa_nodes.get(&(numa_node_idx as u32)); - if numa_node.unwrap().cpus.contains(&(cpu_id as u8)) { + if numa_node.unwrap().cpus.contains(&(cpu_id as u32)) { fdt.property_u32("numa-node-id", numa_node_idx as u32)?; } } diff --git a/arch/src/aarch64/mod.rs b/arch/src/aarch64/mod.rs index 51f51ccaf6..c807429146 100644 --- a/arch/src/aarch64/mod.rs +++ b/arch/src/aarch64/mod.rs @@ -67,7 +67,7 @@ pub struct EntryPoint { /// Configure the specified VCPU, and return its MPIDR. pub fn configure_vcpu( vcpu: &Arc, - id: u8, + id: u32, boot_setup: Option<(EntryPoint, &GuestMemoryAtomic)>, ) -> super::Result { if let Some((kernel_entry_point, _guest_memory)) = boot_setup { diff --git a/arch/src/lib.rs b/arch/src/lib.rs index bbca3e4ea7..cbeb37f51d 100644 --- a/arch/src/lib.rs +++ b/arch/src/lib.rs @@ -123,7 +123,7 @@ fn pagesize() -> usize { pub struct NumaNode { pub memory_regions: Vec>, pub hotplug_regions: Vec>, - pub cpus: Vec, + pub cpus: Vec, pub pci_segments: Vec, pub distances: BTreeMap, pub memory_zones: Vec, diff --git a/arch/src/riscv64/mod.rs b/arch/src/riscv64/mod.rs index 128698961d..62554bd1a7 100644 --- a/arch/src/riscv64/mod.rs +++ b/arch/src/riscv64/mod.rs @@ -59,7 +59,7 @@ pub struct EntryPoint { /// Configure the specified VCPU, and return its MPIDR. pub fn configure_vcpu( vcpu: &Arc, - id: u8, + id: u32, boot_setup: Option<(EntryPoint, &GuestMemoryAtomic)>, ) -> super::Result<()> { if let Some((kernel_entry_point, _guest_memory)) = boot_setup { diff --git a/arch/src/x86_64/mod.rs b/arch/src/x86_64/mod.rs index baa984c94b..35ec4b9253 100644 --- a/arch/src/x86_64/mod.rs +++ b/arch/src/x86_64/mod.rs @@ -820,14 +820,14 @@ pub fn generate_common_cpuid( pub fn configure_vcpu( vcpu: &Arc, - id: u8, + id: u32, boot_setup: Option<(EntryPoint, &GuestMemoryAtomic)>, cpuid: Vec, kvm_hyperv: bool, cpu_vendor: CpuVendor, topology: Option<(u8, u8, u8)>, ) -> super::Result<()> { - let x2apic_id = get_x2apic_id(id as u32, topology); + let x2apic_id = get_x2apic_id(id, topology); // Per vCPU CPUID changes; common are handled via generate_common_cpuid() let mut cpuid = cpuid; @@ -946,7 +946,7 @@ pub fn configure_system( cmdline_addr: GuestAddress, cmdline_size: usize, initramfs: &Option, - _num_cpus: u8, + _num_cpus: u32, setup_header: Option, rsdp_addr: Option, sgx_epc_region: Option, @@ -1365,10 +1365,10 @@ fn update_cpuid_topology( cores_per_die: u8, dies_per_package: u8, cpu_vendor: CpuVendor, - id: u8, + id: u32, ) { let x2apic_id = get_x2apic_id( - id as u32, + id, Some((threads_per_core, cores_per_die, dies_per_package)), ); diff --git a/arch/src/x86_64/mptable.rs b/arch/src/x86_64/mptable.rs index aaf6f1ddd7..42667df9c2 100644 --- a/arch/src/x86_64/mptable.rs +++ b/arch/src/x86_64/mptable.rs @@ -121,7 +121,7 @@ fn mpf_intel_compute_checksum(v: &mpspec::mpf_intel) -> u8 { (!checksum).wrapping_add(1) } -fn compute_mp_size(num_cpus: u8) -> usize { +fn compute_mp_size(num_cpus: u32) -> usize { mem::size_of::() + mem::size_of::() + mem::size_of::() * (num_cpus as usize) @@ -135,12 +135,12 @@ fn compute_mp_size(num_cpus: u8) -> usize { pub fn setup_mptable( offset: GuestAddress, mem: &GuestMemoryMmap, - num_cpus: u8, + num_cpus: u32, topology: Option<(u8, u8, u8)>, ) -> Result<()> { if num_cpus > 0 { let cpu_id_max = num_cpus - 1; - let x2apic_id_max = get_x2apic_id(cpu_id_max.into(), topology); + let x2apic_id_max = get_x2apic_id(cpu_id_max, topology); if x2apic_id_max >= MAX_SUPPORTED_CPUS { return Err(Error::TooManyCpus); } @@ -195,7 +195,7 @@ pub fn setup_mptable( for cpu_id in 0..num_cpus { let mut mpc_cpu = MpcCpuWrapper(mpspec::mpc_cpu::default()); mpc_cpu.0.type_ = mpspec::MP_PROCESSOR as u8; - mpc_cpu.0.apicid = get_x2apic_id(cpu_id as u32, topology) as u8; + mpc_cpu.0.apicid = get_x2apic_id(cpu_id, topology) as u8; mpc_cpu.0.apicver = APIC_VERSION; mpc_cpu.0.cpuflag = mpspec::CPU_ENABLED as u8 | if cpu_id == 0 { @@ -392,13 +392,11 @@ mod tests { #[test] fn cpu_entry_count() { - let mem = GuestMemoryMmap::from_ranges(&[( - MPTABLE_START, - compute_mp_size(MAX_SUPPORTED_CPUS as u8), - )]) - .unwrap(); + let mem = + GuestMemoryMmap::from_ranges(&[(MPTABLE_START, compute_mp_size(MAX_SUPPORTED_CPUS))]) + .unwrap(); - for i in 0..MAX_SUPPORTED_CPUS as u8 { + for i in 0..MAX_SUPPORTED_CPUS { setup_mptable(MPTABLE_START, &mem, i, None).unwrap(); let mpf_intel: MpfIntelWrapper = mem.read_obj(MPTABLE_START).unwrap(); @@ -429,10 +427,9 @@ mod tests { #[test] fn cpu_entry_count_max() { let cpus = MAX_SUPPORTED_CPUS + 1; - let mem = - GuestMemoryMmap::from_ranges(&[(MPTABLE_START, compute_mp_size(cpus as u8))]).unwrap(); + let mem = GuestMemoryMmap::from_ranges(&[(MPTABLE_START, compute_mp_size(cpus))]).unwrap(); - let result = setup_mptable(MPTABLE_START, &mem, cpus as u8, None); + let result = setup_mptable(MPTABLE_START, &mem, cpus, None); result.unwrap_err(); } } diff --git a/devices/src/aia.rs b/devices/src/aia.rs index 83ed1585f4..f3956727ac 100644 --- a/devices/src/aia.rs +++ b/devices/src/aia.rs @@ -40,7 +40,7 @@ pub struct Aia { impl Aia { pub fn new( - vcpu_count: u8, + vcpu_count: u32, interrupt_manager: Arc>, vm: Arc, ) -> Result { diff --git a/devices/src/gic.rs b/devices/src/gic.rs index afa5814a16..dcae0be375 100644 --- a/devices/src/gic.rs +++ b/devices/src/gic.rs @@ -39,7 +39,7 @@ pub struct Gic { impl Gic { pub fn new( - vcpu_count: u8, + vcpu_count: u32, interrupt_manager: Arc>, vm: Arc, ) -> Result { diff --git a/fuzz/Cargo.lock b/fuzz/Cargo.lock index bb325964f1..f6692014dc 100644 --- a/fuzz/Cargo.lock +++ b/fuzz/Cargo.lock @@ -61,9 +61,9 @@ dependencies = [ [[package]] name = "anyhow" -version = "1.0.95" +version = "1.0.98" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "34ac096ce696dc2fcabef30516bb13c0a68a11d30131d3df6f04711467681b04" +checksum = "e16d2d3311acee920a9eb8d33b8cbc1787ce4a264e85f964c2404b969bdcd487" [[package]] name = "arbitrary" @@ -93,7 +93,6 @@ dependencies = [ "uuid", "vm-fdt", "vm-memory", - "vm-migration", "vmm-sys-util", ] @@ -564,13 +563,13 @@ dependencies = [ [[package]] name = "landlock" -version = "0.4.1" +version = "0.4.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "18738c5d4c7fae6727a96adb94722ef7ce82f3eafea0a11777e258a93816537e" +checksum = "b3d2ef408b88e913bfc6594f5e693d57676f6463ded7d8bf994175364320c706" dependencies = [ "enumflags2", "libc", - "thiserror 1.0.64", + "thiserror 2.0.12", ] [[package]] @@ -936,9 +935,9 @@ dependencies = [ [[package]] name = "serde_with" -version = "3.9.0" +version = "3.14.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "69cecfa94848272156ea67b2b1a53f20fc7bc638c4a46d2f8abde08f05f4b857" +checksum = "f2c45cd61fefa9db6f254525d46e392b852e0e61d9a1fd36e5bd183450a556d5" dependencies = [ "serde", "serde_derive", @@ -947,9 +946,9 @@ dependencies = [ [[package]] name = "serde_with_macros" -version = "3.9.0" +version = "3.14.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a8fee4991ef4f274617a51ad4af30519438dacb2f56ac773b08a1922ff743350" +checksum = "de90945e6565ce0d9a25098082ed4ee4002e047cb59892c318d66821e14bb30f" dependencies = [ "darling", "proc-macro2", @@ -1080,7 +1079,6 @@ name = "tpm" version = "0.1.0" dependencies = [ "anyhow", - "byteorder", "libc", "log", "net_gen", @@ -1190,20 +1188,17 @@ name = "virtio-devices" version = "0.1.0" dependencies = [ "anyhow", - "arc-swap", "block", "byteorder", "epoll", "event_monitor", "libc", "log", - "net_gen", "net_util", "pci", "rate_limiter", "seccompiler", "serde", - "serde_json", "serde_with", "serial_buffer", "thiserror 2.0.12", @@ -1243,12 +1238,10 @@ dependencies = [ name = "vm-device" version = "0.1.0" dependencies = [ - "anyhow", "hypervisor", "serde", "thiserror 2.0.12", "vfio-ioctls", - "vm-memory", "vmm-sys-util", ] @@ -1284,7 +1277,6 @@ dependencies = [ name = "vm-virtio" version = "0.1.0" dependencies = [ - "log", "virtio-queue", "vm-memory", ] @@ -1295,7 +1287,6 @@ version = "0.1.0" dependencies = [ "acpi_tables", "anyhow", - "arc-swap", "arch", "bitflags 2.9.0", "block", @@ -1329,7 +1320,6 @@ dependencies = [ "vfio_user", "virtio-bindings", "virtio-devices", - "virtio-queue", "vm-allocator", "vm-device", "vm-memory", diff --git a/fuzz/fuzz_targets/http_api.rs b/fuzz/fuzz_targets/http_api.rs index 8f41903f6f..5c146ad9d8 100644 --- a/fuzz/fuzz_targets/http_api.rs +++ b/fuzz/fuzz_targets/http_api.rs @@ -221,7 +221,7 @@ impl RequestHandler for StubApiRequestHandler { Ok(()) } - fn vm_resize(&mut self, _: Option, _: Option, _: Option) -> Result<(), VmError> { + fn vm_resize(&mut self, _: Option, _: Option, _: Option) -> Result<(), VmError> { Ok(()) } diff --git a/hypervisor/src/cpu.rs b/hypervisor/src/cpu.rs index c2eb03a267..46401cc66c 100644 --- a/hypervisor/src/cpu.rs +++ b/hypervisor/src/cpu.rs @@ -471,7 +471,7 @@ pub trait Vcpu: Send + Sync { &self, vm: &Arc, kvi: &mut VcpuInit, - id: u8, + id: u32, ) -> Result<()>; /// /// Returns VcpuInit with default value set @@ -498,7 +498,7 @@ pub trait Vcpu: Send + Sync { /// Configure core registers for a given CPU. /// #[cfg(any(target_arch = "aarch64", target_arch = "riscv64"))] - fn setup_regs(&self, cpu_id: u8, boot_ip: u64, fdt_start: u64) -> Result<()>; + fn setup_regs(&self, cpu_id: u32, boot_ip: u64, fdt_start: u64) -> Result<()>; /// /// Check if the CPU supports PMU /// diff --git a/hypervisor/src/kvm/mod.rs b/hypervisor/src/kvm/mod.rs index 8b8351b6de..f92720779e 100644 --- a/hypervisor/src/kvm/mod.rs +++ b/hypervisor/src/kvm/mod.rs @@ -559,7 +559,7 @@ impl vm::Vm for KvmVm { /// fn create_vcpu( &self, - id: u8, + id: u32, vm_ops: Option>, ) -> vm::Result> { let fd = self @@ -2150,7 +2150,7 @@ impl cpu::Vcpu for KvmVcpu { &self, vm: &Arc, kvi: &mut crate::VcpuInit, - id: u8, + id: u32, ) -> cpu::Result<()> { use std::arch::is_aarch64_feature_detected; #[allow(clippy::nonminimal_bool)] @@ -2280,7 +2280,7 @@ impl cpu::Vcpu for KvmVcpu { /// Configure core registers for a given CPU. /// #[cfg(target_arch = "aarch64")] - fn setup_regs(&self, cpu_id: u8, boot_ip: u64, fdt_start: u64) -> cpu::Result<()> { + fn setup_regs(&self, cpu_id: u32, boot_ip: u64, fdt_start: u64) -> cpu::Result<()> { // Get the register index of the PSTATE (Processor State) register. let pstate = offset_of!(kvm_regs, regs.pstate); self.fd @@ -2326,7 +2326,7 @@ impl cpu::Vcpu for KvmVcpu { /// /// Configure registers for a given RISC-V CPU. /// - fn setup_regs(&self, cpu_id: u8, boot_ip: u64, fdt_start: u64) -> cpu::Result<()> { + fn setup_regs(&self, cpu_id: u32, boot_ip: u64, fdt_start: u64) -> cpu::Result<()> { // Setting the A0 () to the hartid of this CPU. let a0 = offset_of!(kvm_riscv_core, regs.a0); self.fd diff --git a/hypervisor/src/mshv/mod.rs b/hypervisor/src/mshv/mod.rs index 7783a2fc18..8b331cf0a8 100644 --- a/hypervisor/src/mshv/mod.rs +++ b/hypervisor/src/mshv/mod.rs @@ -1262,7 +1262,7 @@ impl cpu::Vcpu for MshvVcpu { } #[cfg(target_arch = "aarch64")] - fn setup_regs(&self, cpu_id: u8, boot_ip: u64, fdt_start: u64) -> cpu::Result<()> { + fn setup_regs(&self, cpu_id: u32, boot_ip: u64, fdt_start: u64) -> cpu::Result<()> { let arr_reg_name_value = [( hv_register_name_HV_ARM64_REGISTER_PSTATE, regs::PSTATE_FAULT_BITS_64, @@ -1324,7 +1324,7 @@ impl cpu::Vcpu for MshvVcpu { &self, _vm: &Arc, _kvi: &mut crate::VcpuInit, - _id: u8, + _id: u32, ) -> cpu::Result<()> { Ok(()) } @@ -1834,9 +1834,10 @@ impl vm::Vm for MshvVm { /// fn create_vcpu( &self, - id: u8, + id: u32, vm_ops: Option>, ) -> vm::Result> { + let id: u8 = id.try_into().unwrap(); let vcpu_fd = self .fd .create_vcpu(id) diff --git a/hypervisor/src/vm.rs b/hypervisor/src/vm.rs index 306aed0ff8..25a80666ae 100644 --- a/hypervisor/src/vm.rs +++ b/hypervisor/src/vm.rs @@ -319,7 +319,7 @@ pub trait Vm: Send + Sync + Any { /// Unregister an event that will, when signaled, trigger the `gsi` IRQ. fn unregister_irqfd(&self, fd: &EventFd, gsi: u32) -> Result<()>; /// Creates a new KVM vCPU file descriptor and maps the memory corresponding - fn create_vcpu(&self, id: u8, vm_ops: Option>) -> Result>; + fn create_vcpu(&self, id: u32, vm_ops: Option>) -> Result>; #[cfg(target_arch = "aarch64")] fn create_vgic(&self, config: VgicConfig) -> Result>>; #[cfg(target_arch = "riscv64")] diff --git a/src/bin/ch-remote.rs b/src/bin/ch-remote.rs index 4278af6175..904565e5cd 100644 --- a/src/bin/ch-remote.rs +++ b/src/bin/ch-remote.rs @@ -725,7 +725,7 @@ fn resize_config( memory: Option<&str>, balloon: Option<&str>, ) -> Result { - let desired_vcpus: Option = if let Some(cpus) = cpus { + let desired_vcpus: Option = if let Some(cpus) = cpus { Some(cpus.parse().map_err(Error::InvalidCpuCount)?) } else { None diff --git a/vmm/src/acpi.rs b/vmm/src/acpi.rs index 8dc2c3276c..b9e809e4a1 100644 --- a/vmm/src/acpi.rs +++ b/vmm/src/acpi.rs @@ -326,9 +326,9 @@ fn create_srat_table( for cpu in &node.cpus { #[cfg(target_arch = "x86_64")] - let x2apic_id = arch::x86_64::get_x2apic_id(*cpu as u32, topology); + let x2apic_id = arch::x86_64::get_x2apic_id(*cpu, topology); #[cfg(target_arch = "aarch64")] - let x2apic_id = *cpu as u32; + let x2apic_id = *cpu; // Flags // - Enabled = 1 (bit 0) diff --git a/vmm/src/api/mod.rs b/vmm/src/api/mod.rs index 95c4019b48..a856f49f55 100644 --- a/vmm/src/api/mod.rs +++ b/vmm/src/api/mod.rs @@ -217,7 +217,7 @@ pub struct VmmPingResponse { #[derive(Clone, Deserialize, Serialize, Default, Debug)] pub struct VmResizeData { - pub desired_vcpus: Option, + pub desired_vcpus: Option, pub desired_ram: Option, pub desired_balloon: Option, } @@ -307,7 +307,7 @@ pub trait RequestHandler { fn vm_resize( &mut self, - desired_vcpus: Option, + desired_vcpus: Option, desired_ram: Option, desired_balloon: Option, ) -> Result<(), VmError>; diff --git a/vmm/src/cpu.rs b/vmm/src/cpu.rs index d8278067a5..e5813562b1 100644 --- a/vmm/src/cpu.rs +++ b/vmm/src/cpu.rs @@ -338,7 +338,7 @@ macro_rules! round_up { pub struct Vcpu { // The hypervisor abstracted CPU. vcpu: Arc, - id: u8, + id: u32, #[cfg(target_arch = "aarch64")] mpidr: u64, saved_state: Option, @@ -356,8 +356,8 @@ impl Vcpu { /// * `vm_ops` - Optional object for exit handling. /// * `cpu_vendor` - CPU vendor as reported by __cpuid(0x0) pub fn new( - id: u8, - apic_id: u8, + id: u32, + apic_id: u32, vm: &Arc, vm_ops: Option>, #[cfg(target_arch = "x86_64")] cpu_vendor: CpuVendor, @@ -532,8 +532,8 @@ pub struct CpuManager { vm_ops: Arc, #[cfg_attr(target_arch = "aarch64", allow(dead_code))] acpi_address: Option, - proximity_domain_per_cpu: BTreeMap, - affinity: BTreeMap>, + proximity_domain_per_cpu: BTreeMap, + affinity: BTreeMap>, dynamic: bool, hypervisor: Arc, #[cfg(feature = "sev_snp")] @@ -558,7 +558,7 @@ impl BusDevice for CpuManager { data[0] = self.selected_cpu; } CPU_STATUS_OFFSET => { - if self.selected_cpu < self.max_vcpus() { + if (self.selected_cpu as u32) < self.max_vcpus() { let state = &self.vcpu_states[usize::from(self.selected_cpu)]; if state.active() { data[0] |= 1 << CPU_ENABLE_FLAG; @@ -588,7 +588,7 @@ impl BusDevice for CpuManager { self.selected_cpu = data[0]; } CPU_STATUS_OFFSET => { - if self.selected_cpu < self.max_vcpus() { + if (self.selected_cpu as u32) < self.max_vcpus() { let state = &mut self.vcpu_states[usize::from(self.selected_cpu)]; // The ACPI code writes back a 1 to acknowledge the insertion if (data[0] & (1 << CPU_INSERTING_FLAG) == 1 << CPU_INSERTING_FLAG) @@ -604,7 +604,7 @@ impl BusDevice for CpuManager { } // Trigger removal of vCPU if data[0] & (1 << CPU_EJECT_FLAG) == 1 << CPU_EJECT_FLAG { - if let Err(e) = self.remove_vcpu(self.selected_cpu) { + if let Err(e) = self.remove_vcpu(self.selected_cpu as u32) { error!("Error removing vCPU: {:?}", e); } } @@ -730,7 +730,7 @@ impl CpuManager { } } - let proximity_domain_per_cpu: BTreeMap = { + let proximity_domain_per_cpu: BTreeMap = { let mut cpu_list = Vec::new(); for (proximity_domain, numa_node) in numa_nodes.iter() { for cpu in numa_node.cpus.iter() { @@ -745,7 +745,7 @@ impl CpuManager { let affinity = if let Some(cpu_affinity) = config.affinity.as_ref() { cpu_affinity .iter() - .map(|a| (a.vcpu, a.host_cpus.clone())) + .map(|a| (a.vcpu as u32, a.host_cpus.clone())) .collect() } else { BTreeMap::new() @@ -817,19 +817,19 @@ impl CpuManager { Ok(()) } - fn create_vcpu(&mut self, cpu_id: u8, snapshot: Option) -> Result>> { + fn create_vcpu(&mut self, cpu_id: u32, snapshot: Option) -> Result>> { info!("Creating vCPU: cpu_id = {}", cpu_id); #[cfg(target_arch = "x86_64")] let topology = self.get_vcpu_topology(); #[cfg(target_arch = "x86_64")] - let x2apic_id = arch::x86_64::get_x2apic_id(cpu_id as u32, topology); + let x2apic_id = arch::x86_64::get_x2apic_id(cpu_id, topology); #[cfg(any(target_arch = "aarch64", target_arch = "riscv64"))] - let x2apic_id = cpu_id as u32; + let x2apic_id = cpu_id; let mut vcpu = Vcpu::new( cpu_id, - x2apic_id as u8, + x2apic_id, &self.vm, Some(self.vm_ops.clone()), #[cfg(target_arch = "x86_64")] @@ -884,7 +884,7 @@ impl CpuManager { #[cfg(target_arch = "x86_64")] let topology = self.config.topology.clone().map_or_else( - || Some((1, self.boot_vcpus(), 1)), + || Some((1, self.boot_vcpus().try_into().unwrap(), 1)), |t| Some((t.threads_per_core, t.cores_per_die, t.dies_per_package)), ); #[cfg(target_arch = "x86_64")] @@ -907,7 +907,7 @@ impl CpuManager { /// Only create new vCPUs if there aren't any inactive ones to reuse fn create_vcpus( &mut self, - desired_vcpus: u8, + desired_vcpus: u32, snapshot: Option, ) -> Result>>> { let mut vcpus: Vec>> = vec![]; @@ -919,12 +919,12 @@ impl CpuManager { self.present_vcpus() ); - if desired_vcpus > self.config.max_vcpus { + if desired_vcpus > self.config.max_vcpus as u32 { return Err(Error::DesiredVCpuCountExceedsMax); } // Only create vCPUs in excess of all the allocated vCPUs. - for cpu_id in self.vcpus.len() as u8..desired_vcpus { + for cpu_id in self.vcpus.len() as u32..desired_vcpus { vcpus.push(self.create_vcpu( cpu_id, // TODO: The special format of the CPU id can be removed once @@ -962,7 +962,7 @@ impl CpuManager { fn start_vcpu( &mut self, vcpu: Arc>, - vcpu_id: u8, + vcpu_id: u32, vcpu_thread_barrier: Arc, inserting: bool, ) -> Result<()> { @@ -977,12 +977,16 @@ impl CpuManager { let vcpu_pause_signalled = self.vcpus_pause_signalled.clone(); let vcpu_kick_signalled = self.vcpus_kick_signalled.clone(); - let vcpu_kill = self.vcpu_states[usize::from(vcpu_id)].kill.clone(); - let vcpu_run_interrupted = self.vcpu_states[usize::from(vcpu_id)] + let vcpu_kill = self.vcpu_states[usize::try_from(vcpu_id).unwrap()] + .kill + .clone(); + let vcpu_run_interrupted = self.vcpu_states[usize::try_from(vcpu_id).unwrap()] .vcpu_run_interrupted .clone(); let panic_vcpu_run_interrupted = vcpu_run_interrupted.clone(); - let vcpu_paused = self.vcpu_states[usize::from(vcpu_id)].paused.clone(); + let vcpu_paused = self.vcpu_states[usize::try_from(vcpu_id).unwrap()] + .paused + .clone(); // Prepare the CPU set the current vCPU is expected to run onto. let cpuset = self.affinity.get(&vcpu_id).map(|host_cpus| { @@ -1213,8 +1217,8 @@ impl CpuManager { // On hot plug calls into this function entry_point is None. It is for // those hotplug CPU additions that we need to set the inserting flag. - self.vcpu_states[usize::from(vcpu_id)].handle = handle; - self.vcpu_states[usize::from(vcpu_id)].inserting = inserting; + self.vcpu_states[usize::try_from(vcpu_id).unwrap()].handle = handle; + self.vcpu_states[usize::try_from(vcpu_id).unwrap()].inserting = inserting; Ok(()) } @@ -1222,11 +1226,11 @@ impl CpuManager { /// Start up as many vCPUs threads as needed to reach `desired_vcpus` fn activate_vcpus( &mut self, - desired_vcpus: u8, + desired_vcpus: u32, inserting: bool, paused: Option, ) -> Result<()> { - if desired_vcpus > self.config.max_vcpus { + if desired_vcpus > self.config.max_vcpus as u32 { return Err(Error::DesiredVCpuCountExceedsMax); } @@ -1257,11 +1261,11 @@ impl CpuManager { Ok(()) } - fn mark_vcpus_for_removal(&mut self, desired_vcpus: u8) { + fn mark_vcpus_for_removal(&mut self, desired_vcpus: u32) { // Mark vCPUs for removal, actual removal happens on ejection for cpu_id in desired_vcpus..self.present_vcpus() { - self.vcpu_states[usize::from(cpu_id)].removing = true; - self.vcpu_states[usize::from(cpu_id)] + self.vcpu_states[usize::try_from(cpu_id).unwrap()].removing = true; + self.vcpu_states[usize::try_from(cpu_id).unwrap()] .pending_removal .store(true, Ordering::SeqCst); } @@ -1276,9 +1280,9 @@ impl CpuManager { false } - fn remove_vcpu(&mut self, cpu_id: u8) -> Result<()> { + fn remove_vcpu(&mut self, cpu_id: u32) -> Result<()> { info!("Removing vCPU: cpu_id = {}", cpu_id); - let state = &mut self.vcpu_states[usize::from(cpu_id)]; + let state = &mut self.vcpu_states[usize::try_from(cpu_id).unwrap()]; state.kill.store(true, Ordering::SeqCst); state.signal_thread(); state.join_thread()?; @@ -1306,7 +1310,7 @@ impl CpuManager { } pub fn start_restored_vcpus(&mut self) -> Result<()> { - self.activate_vcpus(self.vcpus.len() as u8, false, Some(true)) + self.activate_vcpus(self.vcpus.len() as u32, false, Some(true)) .map_err(|e| { Error::StartRestoreVcpu(anyhow!("Failed to start restored vCPUs: {:#?}", e)) })?; @@ -1314,7 +1318,7 @@ impl CpuManager { Ok(()) } - pub fn resize(&mut self, desired_vcpus: u8) -> Result { + pub fn resize(&mut self, desired_vcpus: u32) -> Result { if desired_vcpus.cmp(&self.present_vcpus()) == cmp::Ordering::Equal { return Ok(false); } @@ -1387,12 +1391,12 @@ impl CpuManager { Ok(()) } - pub fn boot_vcpus(&self) -> u8 { - self.config.boot_vcpus + pub fn boot_vcpus(&self) -> u32 { + self.config.boot_vcpus as u32 } - pub fn max_vcpus(&self) -> u8 { - self.config.max_vcpus + pub fn max_vcpus(&self) -> u32 { + self.config.max_vcpus as u32 } #[cfg(target_arch = "x86_64")] @@ -1401,10 +1405,10 @@ impl CpuManager { self.cpuid.clone() } - fn present_vcpus(&self) -> u8 { + fn present_vcpus(&self) -> u32 { self.vcpu_states .iter() - .fold(0, |acc, state| acc + state.active() as u8) + .fold(0, |acc, state| acc + state.active() as u32) } #[cfg(target_arch = "aarch64")] @@ -1441,15 +1445,15 @@ impl CpuManager { { madt.write(36, arch::layout::APIC_START.0); - for cpu in 0..self.config.max_vcpus { - let x2apic_id = get_x2apic_id(cpu.into(), self.get_vcpu_topology()); + for cpu in 0..self.config.max_vcpus as u32 { + let x2apic_id = get_x2apic_id(cpu, self.get_vcpu_topology()); let lapic = LocalX2Apic { r#type: acpi::ACPI_X2APIC_PROCESSOR, length: 16, - processor_id: cpu.into(), + processor_id: cpu, apic_id: x2apic_id, - flags: if cpu < self.config.boot_vcpus { + flags: if cpu < self.config.boot_vcpus as u32 { 1 << MADT_CPU_ENABLE_FLAG } else { 0 @@ -1571,7 +1575,8 @@ impl CpuManager { // 1 package, multiple cores, 1 thread per core // This is also the behavior when PPTT is missing. let (threads_per_core, cores_per_package, packages) = - self.get_vcpu_topology().unwrap_or((1, self.max_vcpus(), 1)); + self.get_vcpu_topology() + .unwrap_or((1, self.max_vcpus().try_into().unwrap(), 1)); let mut pptt = Sdt::new(*b"PPTT", 36, 2, *b"CLOUDH", *b"CHPPTT ", 1); @@ -1922,7 +1927,7 @@ impl CpuManager { } struct Cpu { - cpu_id: u8, + cpu_id: u32, proximity_domain: u32, dynamic: bool, #[cfg(target_arch = "x86_64")] @@ -1938,12 +1943,12 @@ const MADT_CPU_ONLINE_CAPABLE_FLAG: usize = 1; impl Cpu { #[cfg(target_arch = "x86_64")] fn generate_mat(&self) -> Vec { - let x2apic_id = arch::x86_64::get_x2apic_id(self.cpu_id.into(), self.topology); + let x2apic_id = arch::x86_64::get_x2apic_id(self.cpu_id, self.topology); let lapic = LocalX2Apic { r#type: crate::acpi::ACPI_X2APIC_PROCESSOR, length: 16, - processor_id: self.cpu_id.into(), + processor_id: self.cpu_id, apic_id: x2apic_id, flags: 1 << MADT_CPU_ENABLE_FLAG, _reserved: 0, @@ -2045,7 +2050,7 @@ impl Aml for Cpu { } struct CpuNotify { - cpu_id: u8, + cpu_id: u32, } impl Aml for CpuNotify { @@ -2060,7 +2065,7 @@ impl Aml for CpuNotify { } struct CpuMethods { - max_vcpus: u8, + max_vcpus: u32, dynamic: bool, } @@ -2098,7 +2103,7 @@ impl Aml for CpuMethods { let mut cpu_notifies_refs: Vec<&dyn Aml> = Vec::new(); for cpu_id in 0..self.max_vcpus { - cpu_notifies_refs.push(&cpu_notifies[usize::from(cpu_id)]); + cpu_notifies_refs.push(&cpu_notifies[usize::try_from(cpu_id).unwrap()]); } aml::Method::new("CTFY".into(), 2, true, cpu_notifies_refs).to_aml_bytes(sink); @@ -2242,7 +2247,7 @@ impl Aml for CpuManager { let uid = aml::Name::new("_CID".into(), &aml::EISAName::new("PNP0A05")); // Bundle methods together under a common object let methods = CpuMethods { - max_vcpus: self.config.max_vcpus, + max_vcpus: self.config.max_vcpus as u32, dynamic: self.dynamic, }; let mut cpu_data_inner: Vec<&dyn Aml> = vec![&hid, &uid, &methods]; @@ -2250,7 +2255,7 @@ impl Aml for CpuManager { #[cfg(target_arch = "x86_64")] let topology = self.get_vcpu_topology(); let mut cpu_devices = Vec::new(); - for cpu_id in 0..self.config.max_vcpus { + for cpu_id in 0..(self.config.max_vcpus as u32) { let proximity_domain = *self.proximity_domain_per_cpu.get(&cpu_id).unwrap_or(&0); let cpu_device = Cpu { cpu_id, @@ -2624,7 +2629,7 @@ impl CpuElf64Writable for CpuManager { pos += descsz - size_of::() - size_of::(); let orig_rax: u64 = 0; - let gregs = self.vcpus[usize::from(vcpu_id)] + let gregs = self.vcpus[usize::try_from(vcpu_id).unwrap()] .lock() .unwrap() .vcpu @@ -2652,7 +2657,7 @@ impl CpuElf64Writable for CpuManager { orig_rax, ]; - let sregs = self.vcpus[usize::from(vcpu_id)] + let sregs = self.vcpus[usize::try_from(vcpu_id).unwrap()] .lock() .unwrap() .vcpu @@ -2726,7 +2731,7 @@ impl CpuElf64Writable for CpuManager { pos += round_up!(COREDUMP_NAME_SIZE as usize, 4); - let gregs = self.vcpus[usize::from(vcpu_id)] + let gregs = self.vcpus[usize::try_from(vcpu_id).unwrap()] .lock() .unwrap() .vcpu @@ -2755,7 +2760,7 @@ impl CpuElf64Writable for CpuManager { gregs.get_r15(), ]; - let sregs = self.vcpus[usize::from(vcpu_id)] + let sregs = self.vcpus[usize::try_from(vcpu_id).unwrap()] .lock() .unwrap() .vcpu diff --git a/vmm/src/device_manager.rs b/vmm/src/device_manager.rs index 53e2d1a4ac..f43c50b11f 100644 --- a/vmm/src/device_manager.rs +++ b/vmm/src/device_manager.rs @@ -1672,7 +1672,7 @@ impl DeviceManager { ) -> DeviceManagerResult>> { let interrupt_controller: Arc> = Arc::new(Mutex::new( gic::Gic::new( - self.config.lock().unwrap().cpus.boot_vcpus, + self.config.lock().unwrap().cpus.boot_vcpus as u32, Arc::clone(&self.msi_interrupt_manager), self.address_manager.vm.clone(), ) @@ -1725,7 +1725,7 @@ impl DeviceManager { ) -> DeviceManagerResult>> { let interrupt_controller: Arc> = Arc::new(Mutex::new( aia::Aia::new( - self.config.lock().unwrap().cpus.boot_vcpus, + self.config.lock().unwrap().cpus.boot_vcpus as u32, Arc::clone(&self.msi_interrupt_manager), self.address_manager.vm.clone(), ) diff --git a/vmm/src/lib.rs b/vmm/src/lib.rs index e51573159b..f17c4b79d0 100644 --- a/vmm/src/lib.rs +++ b/vmm/src/lib.rs @@ -1808,7 +1808,7 @@ impl RequestHandler for Vmm { fn vm_resize( &mut self, - desired_vcpus: Option, + desired_vcpus: Option, desired_ram: Option, desired_balloon: Option, ) -> result::Result<(), VmError> { @@ -1824,7 +1824,7 @@ impl RequestHandler for Vmm { } else { let mut config = self.vm_config.as_ref().unwrap().lock().unwrap(); if let Some(desired_vcpus) = desired_vcpus { - config.cpus.boot_vcpus = desired_vcpus; + config.cpus.boot_vcpus = desired_vcpus.try_into().unwrap(); } if let Some(desired_ram) = desired_ram { config.memory.size = desired_ram; diff --git a/vmm/src/vm.rs b/vmm/src/vm.rs index 4935bc2e9f..40579b9ccb 100644 --- a/vmm/src/vm.rs +++ b/vmm/src/vm.rs @@ -621,7 +621,7 @@ impl Vm { #[cfg(feature = "tdx")] if tdx_enabled { let cpuid = cpu_manager.lock().unwrap().common_cpuid(); - let max_vcpus = cpu_manager.lock().unwrap().max_vcpus() as u32; + let max_vcpus = cpu_manager.lock().unwrap().max_vcpus(); vm.tdx_init(&cpuid, max_vcpus) .map_err(Error::InitializeTdxVm)?; } @@ -948,7 +948,7 @@ impl Vm { } if let Some(cpus) = &config.cpus { - node.cpus.extend(cpus); + node.cpus.extend(cpus.iter().map(|cpu| *cpu as u32)); } if let Some(pci_segments) = &config.pci_segments { @@ -1719,7 +1719,7 @@ impl Vm { pub fn resize( &mut self, - desired_vcpus: Option, + desired_vcpus: Option, desired_memory: Option, desired_balloon: Option, ) -> Result<()> { @@ -1739,7 +1739,7 @@ impl Vm { .notify_hotplug(AcpiNotificationFlags::CPU_DEVICES_CHANGED) .map_err(Error::DeviceManager)?; } - self.config.lock().unwrap().cpus.boot_vcpus = desired_vcpus; + self.config.lock().unwrap().cpus.boot_vcpus = desired_vcpus.try_into().unwrap(); } if let Some(desired_memory) = desired_memory { From 35121c44db2ae14c2e306018eaacc6367d6362fc Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Tue, 12 Aug 2025 06:55:55 +0000 Subject: [PATCH 086/294] build: Bump slab from 0.4.9 to 0.4.11 Bumps [slab](https://github.com/tokio-rs/slab) from 0.4.9 to 0.4.11. - [Release notes](https://github.com/tokio-rs/slab/releases) - [Changelog](https://github.com/tokio-rs/slab/blob/master/CHANGELOG.md) - [Commits](https://github.com/tokio-rs/slab/compare/v0.4.9...v0.4.11) --- updated-dependencies: - dependency-name: slab dependency-version: 0.4.11 dependency-type: indirect update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] --- Cargo.lock | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 5a78ae54c2..e55ceabf94 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1977,12 +1977,9 @@ dependencies = [ [[package]] name = "slab" -version = "0.4.9" +version = "0.4.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8f92a496fb766b417c996b9c5e57daf2f7ad3b0bebe1ccfca4856390e3d3bb67" -dependencies = [ - "autocfg", -] +checksum = "7a2ae44ef20feb57a68b23d846850f861394c2e02dc425a50098ae8c90267589" [[package]] name = "smallvec" From d70d132a5c5fd86a5a77663524575eb75adcdd81 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Tue, 12 Aug 2025 10:34:39 +0000 Subject: [PATCH 087/294] build: Bump errno from 0.3.12 to 0.3.13 Bumps [errno](https://github.com/lambda-fairy/rust-errno) from 0.3.12 to 0.3.13. - [Release notes](https://github.com/lambda-fairy/rust-errno/releases) - [Changelog](https://github.com/lambda-fairy/rust-errno/blob/main/CHANGELOG.md) - [Commits](https://github.com/lambda-fairy/rust-errno/compare/v0.3.12...v0.3.13) --- updated-dependencies: - dependency-name: errno dependency-version: 0.3.13 dependency-type: indirect update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] --- Cargo.lock | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index e55ceabf94..63204332ba 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -652,12 +652,12 @@ checksum = "877a4ace8713b0bcf2a4e7eec82529c029f1d0619886d18145fea96c3ffe5c0f" [[package]] name = "errno" -version = "0.3.12" +version = "0.3.13" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cea14ef9355e3beab063703aa9dab15afd25f0667c341310c1e5274bb1d0da18" +checksum = "778e2ac28f6c47af28e4907f13ffd1e1ddbd400980a9abd7c8df189bf578a5ad" dependencies = [ "libc", - "windows-sys 0.59.0", + "windows-sys 0.52.0", ] [[package]] @@ -1838,7 +1838,7 @@ dependencies = [ "errno", "libc", "linux-raw-sys 0.4.15", - "windows-sys 0.59.0", + "windows-sys 0.52.0", ] [[package]] @@ -1851,7 +1851,7 @@ dependencies = [ "errno", "libc", "linux-raw-sys 0.9.4", - "windows-sys 0.59.0", + "windows-sys 0.52.0", ] [[package]] From 458721629c981d21abe38d7eb707f9fb7190a2df Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Tue, 12 Aug 2025 17:16:24 +0000 Subject: [PATCH 088/294] build: Bump zvariant from 5.5.3 to 5.6.0 Bumps [zvariant](https://github.com/dbus2/zbus) from 5.5.3 to 5.6.0. - [Release notes](https://github.com/dbus2/zbus/releases) - [Commits](https://github.com/dbus2/zbus/compare/zvariant-5.5.3...zvariant-5.6.0) --- updated-dependencies: - dependency-name: zvariant dependency-version: 5.6.0 dependency-type: indirect update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] --- Cargo.lock | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 63204332ba..110c9a5919 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2957,9 +2957,9 @@ dependencies = [ [[package]] name = "zvariant" -version = "5.5.3" +version = "5.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9d30786f75e393ee63a21de4f9074d4c038d52c5b1bb4471f955db249f9dffb1" +checksum = "d91b3680bb339216abd84714172b5138a4edac677e641ef17e1d8cb1b3ca6e6f" dependencies = [ "endi", "enumflags2", @@ -2971,9 +2971,9 @@ dependencies = [ [[package]] name = "zvariant_derive" -version = "5.5.3" +version = "5.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "75fda702cd42d735ccd48117b1630432219c0e9616bf6cb0f8350844ee4d9580" +checksum = "3a8c68501be459a8dbfffbe5d792acdd23b4959940fc87785fb013b32edbc208" dependencies = [ "proc-macro-crate", "proc-macro2", From 9283f87f907bbd4483fc794ecd75c7702f67db1b Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Tue, 12 Aug 2025 21:19:53 +0000 Subject: [PATCH 089/294] build: Bump igvm from `01daa63` to `dff4ebc` Bumps [igvm](https://github.com/microsoft/igvm) from `01daa63` to `dff4ebc`. - [Release notes](https://github.com/microsoft/igvm/releases) - [Commits](https://github.com/microsoft/igvm/compare/01daa631a596459cb4de58505881007dd13d4410...dff4ebc9c5bd16707ff75de26ccabe2d4dfdbcd8) --- updated-dependencies: - dependency-name: igvm dependency-version: dff4ebc9c5bd16707ff75de26ccabe2d4dfdbcd8 dependency-type: direct:production ... Signed-off-by: dependabot[bot] --- Cargo.lock | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 110c9a5919..606be04202 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -950,8 +950,8 @@ checksum = "b9e0384b61958566e926dc50660321d12159025e767c18e043daf26b70104c39" [[package]] name = "igvm" -version = "0.3.4" -source = "git+https://github.com/microsoft/igvm?branch=main#01daa631a596459cb4de58505881007dd13d4410" +version = "0.4.0" +source = "git+https://github.com/microsoft/igvm?branch=main#dff4ebc9c5bd16707ff75de26ccabe2d4dfdbcd8" dependencies = [ "bitfield-struct", "crc32fast", @@ -967,8 +967,8 @@ dependencies = [ [[package]] name = "igvm_defs" -version = "0.3.4" -source = "git+https://github.com/microsoft/igvm?branch=main#01daa631a596459cb4de58505881007dd13d4410" +version = "0.4.0" +source = "git+https://github.com/microsoft/igvm?branch=main#dff4ebc9c5bd16707ff75de26ccabe2d4dfdbcd8" dependencies = [ "bitfield-struct", "open-enum", From 364a0972f04703731a1e04f5e8ed15ca3256afac Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Tue, 12 Aug 2025 23:09:12 +0000 Subject: [PATCH 090/294] build: Bump rustversion from 1.0.21 to 1.0.22 Bumps [rustversion](https://github.com/dtolnay/rustversion) from 1.0.21 to 1.0.22. - [Release notes](https://github.com/dtolnay/rustversion/releases) - [Commits](https://github.com/dtolnay/rustversion/compare/1.0.21...1.0.22) --- updated-dependencies: - dependency-name: rustversion dependency-version: 1.0.22 dependency-type: indirect update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] --- Cargo.lock | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 606be04202..cce1452830 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1856,9 +1856,9 @@ dependencies = [ [[package]] name = "rustversion" -version = "1.0.21" +version = "1.0.22" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8a0d197bd2c9dc6e53b84da9556a69ba4cdfab8619eb41a8bd1cc2027a0f6b1d" +checksum = "b39cdef0fa800fc44525c84ccb54a029961a8215f9619753635a9c0d2538d46d" [[package]] name = "ryu" From 6e0403a959092495cda632a8df2bea66bebd9237 Mon Sep 17 00:00:00 2001 From: Peter Oskolkov Date: Tue, 12 Aug 2025 22:30:37 +0000 Subject: [PATCH 091/294] misc: make topology a 4-tuple of u16s This is the second patch in a series intended to let Cloud Hypervisor support more than 255 vCPUs in guest VMs; the first patch/commit is https://github.com/cloud-hypervisor/cloud-hypervisor/pull/7231 At the moment, CPU topology in Cloud Hypervisor is using u8 for components, and somewhat inconsistently: - struct CpuTopology in vmm/src/vm_config.rs uses four components (threads_per_core, cores_per_die, dies_per_package, packages); - when passed around as a tuple, it is a 3-tuple of u8, with some inconsistency: - in get_x2apic_id in arch/src/x86_64/mod.rs the three u8 are assumed to be (correctly) threads_per_core, cores_per_die, and dies_per_package, but - in get_vcpu_topology() in vmm/src/cpu.rs the three-tuple is threads_per_core, cores_per_die, and packages (dies_per_package is assumed to always be one? not clear). So for consistency, a 4-tuple is always passed around. In addition, the types of the tuple components is changed from u8 to u16, as on x86_64 subcomponents can consume up to 16 bits. Again, config constraints have not been changed, so this patch is mostly NOOP. Signed-off-by: Barret Rhoden Signed-off-by: Neel Natu Signed-off-by: Ofir Weisse Signed-off-by: Peter Oskolkov --- arch/src/aarch64/fdt.rs | 14 ++++++---- arch/src/aarch64/mod.rs | 2 +- arch/src/x86_64/mod.rs | 57 +++++++++++++++++++++++++------------- arch/src/x86_64/mptable.rs | 2 +- vmm/src/acpi.rs | 2 +- vmm/src/cpu.rs | 43 ++++++++++++++++++++-------- vmm/src/vm.rs | 2 +- 7 files changed, 81 insertions(+), 41 deletions(-) diff --git a/arch/src/aarch64/fdt.rs b/arch/src/aarch64/fdt.rs index 238ad0b32a..aefecf8b2d 100644 --- a/arch/src/aarch64/fdt.rs +++ b/arch/src/aarch64/fdt.rs @@ -218,7 +218,7 @@ pub fn create_fdt, - vcpu_topology: Option<(u8, u8, u8)>, + vcpu_topology: Option<(u16, u16, u16, u16)>, device_info: &HashMap<(DeviceType, String), T, S>, gic_device: &Arc>, initrd: &Option, @@ -280,7 +280,7 @@ pub fn write_fdt_to_memory(fdt_final: Vec, guest_mem: &GuestMemoryMmap) -> R fn create_cpu_nodes( fdt: &mut FdtWriter, vcpu_mpidr: &[u64], - vcpu_topology: Option<(u8, u8, u8)>, + vcpu_topology: Option<(u16, u16, u16, u16)>, numa_nodes: &NumaNodes, ) -> FdtWriterResult<()> { // See https://github.com/torvalds/linux/blob/master/Documentation/devicetree/bindings/arm/cpus.yaml. @@ -289,8 +289,11 @@ fn create_cpu_nodes( fdt.property_u32("#size-cells", 0x0)?; let num_cpus = vcpu_mpidr.len(); - let (threads_per_core, cores_per_package, packages) = vcpu_topology.unwrap_or((1, 1, 1)); - let max_cpus: u32 = (threads_per_core * cores_per_package * packages).into(); + let (threads_per_core, cores_per_die, dies_per_package, packages) = + vcpu_topology.unwrap_or((1, 1, 1, 1)); + let cores_per_package = cores_per_die * dies_per_package; + let max_cpus: u32 = + threads_per_core as u32 * cores_per_die as u32 * dies_per_package as u32 * packages as u32; // Add cache info. // L1 Data Cache Info. @@ -462,7 +465,8 @@ fn create_cpu_nodes( } if let Some(topology) = vcpu_topology { - let (threads_per_core, cores_per_package, packages) = topology; + let (threads_per_core, cores_per_die, dies_per_package, packages) = topology; + let cores_per_package = cores_per_die * dies_per_package; let cpu_map_node = fdt.begin_node("cpu-map")?; // Create device tree nodes with regard of above mapping. diff --git a/arch/src/aarch64/mod.rs b/arch/src/aarch64/mod.rs index c807429146..f7a6c36539 100644 --- a/arch/src/aarch64/mod.rs +++ b/arch/src/aarch64/mod.rs @@ -126,7 +126,7 @@ pub fn configure_system, - vcpu_topology: Option<(u8, u8, u8)>, + vcpu_topology: Option<(u16, u16, u16, u16)>, device_info: &HashMap<(DeviceType, String), T, S>, initrd: &Option, pci_space_info: &[PciSpaceInfo], diff --git a/arch/src/x86_64/mod.rs b/arch/src/x86_64/mod.rs index 35ec4b9253..6dcac04f1b 100644 --- a/arch/src/x86_64/mod.rs +++ b/arch/src/x86_64/mod.rs @@ -209,11 +209,11 @@ pub enum Error { E820Configuration, } -pub fn get_x2apic_id(cpu_id: u32, topology: Option<(u8, u8, u8)>) -> u32 { +pub fn get_x2apic_id(cpu_id: u32, topology: Option<(u16, u16, u16, u16)>) -> u32 { if let Some(t) = topology { - let thread_mask_width = u8::BITS - (t.0 - 1).leading_zeros(); - let core_mask_width = u8::BITS - (t.1 - 1).leading_zeros(); - let die_mask_width = u8::BITS - (t.2 - 1).leading_zeros(); + let thread_mask_width = u16::BITS - (t.0 - 1).leading_zeros(); + let core_mask_width = u16::BITS - (t.1 - 1).leading_zeros(); + let die_mask_width = u16::BITS - (t.2 - 1).leading_zeros(); let thread_id = cpu_id % (t.0 as u32); let core_id = cpu_id / (t.0 as u32) % (t.1 as u32); @@ -229,6 +229,13 @@ pub fn get_x2apic_id(cpu_id: u32, topology: Option<(u8, u8, u8)>) -> u32 { cpu_id } +pub fn get_max_x2apic_id(topology: (u16, u16, u16, u16)) -> u32 { + get_x2apic_id( + (topology.0 as u32 * topology.1 as u32 * topology.2 as u32 * topology.3 as u32) - 1, + Some(topology), + ) +} + #[derive(Copy, Clone, Debug)] pub enum CpuidReg { EAX, @@ -825,7 +832,7 @@ pub fn configure_vcpu( cpuid: Vec, kvm_hyperv: bool, cpu_vendor: CpuVendor, - topology: Option<(u8, u8, u8)>, + topology: Option<(u16, u16, u16, u16)>, ) -> super::Result<()> { let x2apic_id = get_x2apic_id(id, topology); @@ -850,7 +857,7 @@ pub fn configure_vcpu( assert!(apic_id_patched); if let Some(t) = topology { - update_cpuid_topology(&mut cpuid, t.0, t.1, t.2, cpu_vendor, id); + update_cpuid_topology(&mut cpuid, t.0, t.1, t.2, t.3, cpu_vendor, id); } // The TSC frequency CPUID leaf should not be included when running with HyperV emulation @@ -953,7 +960,7 @@ pub fn configure_system( serial_number: Option<&str>, uuid: Option<&str>, oem_strings: Option<&[&str]>, - topology: Option<(u8, u8, u8)>, + topology: Option<(u16, u16, u16, u16)>, ) -> super::Result<()> { // Write EBDA address to location where ACPICA expects to find it guest_mem @@ -1361,21 +1368,24 @@ pub fn get_host_cpu_phys_bits(hypervisor: &Arc) -> u fn update_cpuid_topology( cpuid: &mut Vec, - threads_per_core: u8, - cores_per_die: u8, - dies_per_package: u8, + threads_per_core: u16, + cores_per_die: u16, + dies_per_package: u16, + packages: u16, cpu_vendor: CpuVendor, id: u32, ) { let x2apic_id = get_x2apic_id( id, - Some((threads_per_core, cores_per_die, dies_per_package)), + Some((threads_per_core, cores_per_die, dies_per_package, packages)), ); - let thread_width = 8 - (threads_per_core - 1).leading_zeros(); - let core_width = (8 - (cores_per_die - 1).leading_zeros()) + thread_width; - let die_width = (8 - (dies_per_package - 1).leading_zeros()) + core_width; + // Note: the topology defined here is per "package" (~NUMA node). + let thread_width = u16::BITS - (threads_per_core - 1).leading_zeros(); + let core_width = u16::BITS - (cores_per_die - 1).leading_zeros() + thread_width; + let die_width = u16::BITS - (dies_per_package - 1).leading_zeros() + core_width; + // The very old way: a flat number of logical CPUs per package: CPUID.1H:EBX[23:16] bits. let mut cpu_ebx = CpuidPatch::get_cpuid_reg(cpuid, 0x1, None, CpuidReg::EBX).unwrap_or(0); cpu_ebx |= ((dies_per_package as u32) * (cores_per_die as u32) * (threads_per_core as u32)) & (0xff << 16); @@ -1385,6 +1395,7 @@ fn update_cpuid_topology( cpu_edx |= 1 << 28; CpuidPatch::set_cpuid_reg(cpuid, 0x1, None, CpuidReg::EDX, cpu_edx); + // The legacy way: threads+cores per package. // CPU Topology leaf 0xb CpuidPatch::set_cpuid_reg(cpuid, 0xb, Some(0), CpuidReg::EAX, thread_width); CpuidPatch::set_cpuid_reg( @@ -1407,6 +1418,7 @@ fn update_cpuid_topology( CpuidPatch::set_cpuid_reg(cpuid, 0xb, Some(1), CpuidReg::ECX, 2 << 8); CpuidPatch::set_cpuid_reg(cpuid, 0xb, Some(1), CpuidReg::EDX, x2apic_id); + // The modern way: many-level hierarchy (but we here only support four levels). // CPU Topology leaf 0x1f CpuidPatch::set_cpuid_reg(cpuid, 0x1f, Some(0), CpuidReg::EAX, thread_width); CpuidPatch::set_cpuid_reg( @@ -1721,22 +1733,27 @@ mod tests { #[test] fn test_get_x2apic_id() { - let x2apic_id = get_x2apic_id(0, Some((2, 3, 1))); + let x2apic_id = get_x2apic_id(0, Some((2, 3, 1, 1))); assert_eq!(x2apic_id, 0); - let x2apic_id = get_x2apic_id(1, Some((2, 3, 1))); + let x2apic_id = get_x2apic_id(1, Some((2, 3, 1, 1))); assert_eq!(x2apic_id, 1); - let x2apic_id = get_x2apic_id(2, Some((2, 3, 1))); + let x2apic_id = get_x2apic_id(2, Some((2, 3, 1, 1))); assert_eq!(x2apic_id, 2); - let x2apic_id = get_x2apic_id(6, Some((2, 3, 1))); + let x2apic_id = get_x2apic_id(6, Some((2, 3, 1, 1))); assert_eq!(x2apic_id, 8); - let x2apic_id = get_x2apic_id(7, Some((2, 3, 1))); + let x2apic_id = get_x2apic_id(7, Some((2, 3, 1, 1))); assert_eq!(x2apic_id, 9); - let x2apic_id = get_x2apic_id(8, Some((2, 3, 1))); + let x2apic_id = get_x2apic_id(8, Some((2, 3, 1, 1))); assert_eq!(x2apic_id, 10); + + let x2apic_id = get_x2apic_id(257, Some((1, 312, 1, 1))); + assert_eq!(x2apic_id, 257); + + assert_eq!(255, get_max_x2apic_id((1, 256, 1, 1))); } } diff --git a/arch/src/x86_64/mptable.rs b/arch/src/x86_64/mptable.rs index 42667df9c2..d709a0043c 100644 --- a/arch/src/x86_64/mptable.rs +++ b/arch/src/x86_64/mptable.rs @@ -136,7 +136,7 @@ pub fn setup_mptable( offset: GuestAddress, mem: &GuestMemoryMmap, num_cpus: u32, - topology: Option<(u8, u8, u8)>, + topology: Option<(u16, u16, u16, u16)>, ) -> Result<()> { if num_cpus > 0 { let cpu_id_max = num_cpus - 1; diff --git a/vmm/src/acpi.rs b/vmm/src/acpi.rs index b9e809e4a1..a2299acd84 100644 --- a/vmm/src/acpi.rs +++ b/vmm/src/acpi.rs @@ -285,7 +285,7 @@ fn create_tpm2_table() -> Sdt { fn create_srat_table( numa_nodes: &NumaNodes, - #[cfg(target_arch = "x86_64")] topology: Option<(u8, u8, u8)>, + #[cfg(target_arch = "x86_64")] topology: Option<(u16, u16, u16, u16)>, ) -> Sdt { let mut srat = Sdt::new(*b"SRAT", 36, 3, *b"CLOUDH", *b"CHSRAT ", 1); // SRAT reserved 12 bytes diff --git a/vmm/src/cpu.rs b/vmm/src/cpu.rs index e5813562b1..7b16b39886 100644 --- a/vmm/src/cpu.rs +++ b/vmm/src/cpu.rs @@ -390,7 +390,7 @@ impl Vcpu { boot_setup: Option<(EntryPoint, &GuestMemoryAtomic)>, #[cfg(target_arch = "x86_64")] cpuid: Vec, #[cfg(target_arch = "x86_64")] kvm_hyperv: bool, - #[cfg(target_arch = "x86_64")] topology: Option<(u8, u8, u8)>, + #[cfg(target_arch = "x86_64")] topology: Option<(u16, u16, u16, u16)>, ) -> Result<()> { #[cfg(target_arch = "aarch64")] { @@ -884,8 +884,22 @@ impl CpuManager { #[cfg(target_arch = "x86_64")] let topology = self.config.topology.clone().map_or_else( - || Some((1, self.boot_vcpus().try_into().unwrap(), 1)), - |t| Some((t.threads_per_core, t.cores_per_die, t.dies_per_package)), + || { + Some(( + 1_u16, + u16::try_from(self.boot_vcpus()).unwrap(), + 1_u16, + 1_u16, + )) + }, + |t| { + Some(( + t.threads_per_core.into(), + t.cores_per_die.into(), + t.dies_per_package.into(), + t.packages.into(), + )) + }, ); #[cfg(target_arch = "x86_64")] vcpu.configure( @@ -1427,11 +1441,15 @@ impl CpuManager { .collect() } - pub fn get_vcpu_topology(&self) -> Option<(u8, u8, u8)> { - self.config - .topology - .clone() - .map(|t| (t.threads_per_core, t.cores_per_die, t.packages)) + pub fn get_vcpu_topology(&self) -> Option<(u16, u16, u16, u16)> { + self.config.topology.clone().map(|t| { + ( + t.threads_per_core.into(), + t.cores_per_die.into(), + t.dies_per_package.into(), + t.packages.into(), + ) + }) } #[cfg(not(target_arch = "riscv64"))] @@ -1574,9 +1592,10 @@ impl CpuManager { // If topology is not specified, the default setting is: // 1 package, multiple cores, 1 thread per core // This is also the behavior when PPTT is missing. - let (threads_per_core, cores_per_package, packages) = - self.get_vcpu_topology() - .unwrap_or((1, self.max_vcpus().try_into().unwrap(), 1)); + let (threads_per_core, cores_per_die, dies_per_package, packages) = self + .get_vcpu_topology() + .unwrap_or((1, u16::try_from(self.max_vcpus()).unwrap(), 1, 1)); + let cores_per_package = cores_per_die * dies_per_package; let mut pptt = Sdt::new(*b"PPTT", 36, 2, *b"CLOUDH", *b"CHPPTT ", 1); @@ -1931,7 +1950,7 @@ struct Cpu { proximity_domain: u32, dynamic: bool, #[cfg(target_arch = "x86_64")] - topology: Option<(u8, u8, u8)>, + topology: Option<(u16, u16, u16, u16)>, } #[cfg(target_arch = "x86_64")] diff --git a/vmm/src/vm.rs b/vmm/src/vm.rs index 40579b9ccb..7e8851194d 100644 --- a/vmm/src/vm.rs +++ b/vmm/src/vm.rs @@ -3582,7 +3582,7 @@ mod tests { &mem, "console=tty0", vec![0], - Some((0, 0, 0)), + Some((0, 0, 0, 0)), &dev_info, &gic, &None, From 68a1bf38e52a39dc47ad250d8ee1f080a51258e2 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Wed, 13 Aug 2025 01:12:38 +0000 Subject: [PATCH 092/294] build: Bump gdbstub_arch from 0.3.0 to 0.3.2 Bumps [gdbstub_arch](https://github.com/daniel5151/gdbstub) from 0.3.0 to 0.3.2. - [Release notes](https://github.com/daniel5151/gdbstub/releases) - [Changelog](https://github.com/daniel5151/gdbstub/blob/master/CHANGELOG.md) - [Commits](https://github.com/daniel5151/gdbstub/commits) --- updated-dependencies: - dependency-name: gdbstub_arch dependency-version: 0.3.2 dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] --- Cargo.lock | 10 +++++----- vmm/Cargo.toml | 2 +- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index cce1452830..a0d22f949d 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -657,7 +657,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "778e2ac28f6c47af28e4907f13ffd1e1ddbd400980a9abd7c8df189bf578a5ad" dependencies = [ "libc", - "windows-sys 0.52.0", + "windows-sys 0.60.2", ] [[package]] @@ -839,9 +839,9 @@ dependencies = [ [[package]] name = "gdbstub_arch" -version = "0.3.0" +version = "0.3.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4e3b1357bd3203fc09a6601327ae0ab38865d14231d0b65d3143f5762cc7977d" +checksum = "22dde0e1b68787036ccedd0b1ff6f953527a0e807e571fbe898975203027278f" dependencies = [ "gdbstub", "num-traits", @@ -1838,7 +1838,7 @@ dependencies = [ "errno", "libc", "linux-raw-sys 0.4.15", - "windows-sys 0.52.0", + "windows-sys 0.59.0", ] [[package]] @@ -1851,7 +1851,7 @@ dependencies = [ "errno", "libc", "linux-raw-sys 0.9.4", - "windows-sys 0.52.0", + "windows-sys 0.59.0", ] [[package]] diff --git a/vmm/Cargo.toml b/vmm/Cargo.toml index 5567a58579..6b33e6344d 100644 --- a/vmm/Cargo.toml +++ b/vmm/Cargo.toml @@ -48,7 +48,7 @@ event_monitor = { path = "../event_monitor" } flume = { workspace = true } futures = { version = "0.3.31", optional = true } gdbstub = { version = "0.7.6", optional = true } -gdbstub_arch = { version = "0.3.0", optional = true } +gdbstub_arch = { version = "0.3.2", optional = true } hex = { version = "0.4.3", optional = true } hypervisor = { path = "../hypervisor" } igvm = { workspace = true, optional = true } From 3d5e5f318ba8a8b2bebe6d95369bdff2385d065f Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Tue, 12 Aug 2025 09:48:26 +0000 Subject: [PATCH 093/294] build: Bump actions/checkout from 4 to 5 Bumps [actions/checkout](https://github.com/actions/checkout) from 4 to 5. - [Release notes](https://github.com/actions/checkout/releases) - [Changelog](https://github.com/actions/checkout/blob/main/CHANGELOG.md) - [Commits](https://github.com/actions/checkout/compare/v4...v5) --- updated-dependencies: - dependency-name: actions/checkout dependency-version: '5' dependency-type: direct:production update-type: version-update:semver-major ... Signed-off-by: dependabot[bot] --- .github/workflows/audit.yaml | 2 +- .github/workflows/build.yaml | 2 +- .github/workflows/dco.yaml | 2 +- .github/workflows/docker-image.yaml | 2 +- .github/workflows/formatting.yaml | 2 +- .github/workflows/fuzz-build.yaml | 2 +- .github/workflows/gitlint.yaml | 2 +- .github/workflows/hadolint.yaml | 2 +- .github/workflows/integration-arm64.yaml | 2 +- .github/workflows/integration-metrics.yaml | 2 +- .github/workflows/integration-rate-limiter.yaml | 2 +- .github/workflows/integration-vfio.yaml | 2 +- .github/workflows/integration-windows.yaml | 2 +- .github/workflows/integration-x86-64.yaml | 2 +- .github/workflows/lychee.yaml | 2 +- .github/workflows/openapi.yaml | 2 +- .github/workflows/package-consistency.yaml | 2 +- .github/workflows/preview-riscv64.yaml | 2 +- .github/workflows/quality.yaml | 4 ++-- .github/workflows/release.yaml | 2 +- .github/workflows/reuse.yaml | 2 +- .github/workflows/shlint.yaml | 2 +- .github/workflows/taplo.yaml | 2 +- 23 files changed, 24 insertions(+), 24 deletions(-) diff --git a/.github/workflows/audit.yaml b/.github/workflows/audit.yaml index 2e44b9af40..cfc21696e1 100644 --- a/.github/workflows/audit.yaml +++ b/.github/workflows/audit.yaml @@ -10,7 +10,7 @@ jobs: name: Audit runs-on: ubuntu-latest steps: - - uses: actions/checkout@v4 + - uses: actions/checkout@v5 - uses: actions-rust-lang/audit@v1 with: token: ${{ secrets.GITHUB_TOKEN }} diff --git a/.github/workflows/build.yaml b/.github/workflows/build.yaml index 5c32406646..c78750f78d 100644 --- a/.github/workflows/build.yaml +++ b/.github/workflows/build.yaml @@ -21,7 +21,7 @@ jobs: - x86_64-unknown-linux-musl steps: - name: Code checkout - uses: actions/checkout@v4 + uses: actions/checkout@v5 with: fetch-depth: 0 diff --git a/.github/workflows/dco.yaml b/.github/workflows/dco.yaml index 888b685820..4c83547872 100644 --- a/.github/workflows/dco.yaml +++ b/.github/workflows/dco.yaml @@ -6,7 +6,7 @@ jobs: name: DCO Check ("Signed-Off-By") runs-on: ubuntu-latest steps: - - uses: actions/checkout@v4 + - uses: actions/checkout@v5 - name: Set up Python 3.x uses: actions/setup-python@v5 with: diff --git a/.github/workflows/docker-image.yaml b/.github/workflows/docker-image.yaml index b5bbdf4284..3e5f56458c 100644 --- a/.github/workflows/docker-image.yaml +++ b/.github/workflows/docker-image.yaml @@ -18,7 +18,7 @@ jobs: runs-on: ubuntu-latest steps: - name: Code checkout - uses: actions/checkout@v4 + uses: actions/checkout@v5 - name: Set up QEMU uses: docker/setup-qemu-action@v3 diff --git a/.github/workflows/formatting.yaml b/.github/workflows/formatting.yaml index b6dd6cafc1..75e4492559 100644 --- a/.github/workflows/formatting.yaml +++ b/.github/workflows/formatting.yaml @@ -19,7 +19,7 @@ jobs: RUSTFLAGS: -D warnings steps: - name: Code checkout - uses: actions/checkout@v4 + uses: actions/checkout@v5 - name: Install Rust toolchain (${{ matrix.rust }}) uses: dtolnay/rust-toolchain@stable with: diff --git a/.github/workflows/fuzz-build.yaml b/.github/workflows/fuzz-build.yaml index db868de2be..427189b01e 100644 --- a/.github/workflows/fuzz-build.yaml +++ b/.github/workflows/fuzz-build.yaml @@ -18,7 +18,7 @@ jobs: RUSTFLAGS: -D warnings steps: - name: Code checkout - uses: actions/checkout@v4 + uses: actions/checkout@v5 - name: Install Rust toolchain (${{ matrix.rust }}) uses: dtolnay/rust-toolchain@stable with: diff --git a/.github/workflows/gitlint.yaml b/.github/workflows/gitlint.yaml index 11ebf707a4..c31fee202d 100644 --- a/.github/workflows/gitlint.yaml +++ b/.github/workflows/gitlint.yaml @@ -8,7 +8,7 @@ jobs: runs-on: ubuntu-latest steps: - name: Checkout repository - uses: actions/checkout@v4 + uses: actions/checkout@v5 with: ref: ${{ github.event.pull_request.head.sha }} fetch-depth: 0 diff --git a/.github/workflows/hadolint.yaml b/.github/workflows/hadolint.yaml index 31b8910984..641d911c0c 100644 --- a/.github/workflows/hadolint.yaml +++ b/.github/workflows/hadolint.yaml @@ -13,7 +13,7 @@ jobs: runs-on: ubuntu-latest steps: - name: Checkout code - uses: actions/checkout@v4 + uses: actions/checkout@v5 - name: Lint Dockerfile uses: hadolint/hadolint-action@master diff --git a/.github/workflows/integration-arm64.yaml b/.github/workflows/integration-arm64.yaml index d580a991cc..41a7bc824a 100644 --- a/.github/workflows/integration-arm64.yaml +++ b/.github/workflows/integration-arm64.yaml @@ -13,7 +13,7 @@ jobs: - name: Fix workspace permissions run: sudo chown -R runner:runner ${GITHUB_WORKSPACE} - name: Code checkout - uses: actions/checkout@v4 + uses: actions/checkout@v5 with: fetch-depth: 0 - name: Run unit tests (musl) diff --git a/.github/workflows/integration-metrics.yaml b/.github/workflows/integration-metrics.yaml index 440e9ad850..e8dd72ea84 100644 --- a/.github/workflows/integration-metrics.yaml +++ b/.github/workflows/integration-metrics.yaml @@ -12,7 +12,7 @@ jobs: METRICS_PUBLISH_KEY: ${{ secrets.METRICS_PUBLISH_KEY }} steps: - name: Code checkout - uses: actions/checkout@v4 + uses: actions/checkout@v5 with: fetch-depth: 0 - name: Run metrics tests diff --git a/.github/workflows/integration-rate-limiter.yaml b/.github/workflows/integration-rate-limiter.yaml index 5700bfe46f..91682f77f8 100644 --- a/.github/workflows/integration-rate-limiter.yaml +++ b/.github/workflows/integration-rate-limiter.yaml @@ -13,7 +13,7 @@ jobs: steps: - name: Code checkout if: ${{ github.event_name != 'pull_request' }} - uses: actions/checkout@v4 + uses: actions/checkout@v5 with: fetch-depth: 0 - name: Run rate-limiter integration tests diff --git a/.github/workflows/integration-vfio.yaml b/.github/workflows/integration-vfio.yaml index 3549ace272..edd7399b15 100644 --- a/.github/workflows/integration-vfio.yaml +++ b/.github/workflows/integration-vfio.yaml @@ -16,7 +16,7 @@ jobs: run: sudo chown -R runner:runner ${GITHUB_WORKSPACE} - name: Code checkout if: ${{ github.event_name != 'pull_request' }} - uses: actions/checkout@v4 + uses: actions/checkout@v5 with: fetch-depth: 0 - name: Run VFIO integration tests diff --git a/.github/workflows/integration-windows.yaml b/.github/workflows/integration-windows.yaml index 29aa04a78f..0769789a9d 100644 --- a/.github/workflows/integration-windows.yaml +++ b/.github/workflows/integration-windows.yaml @@ -11,7 +11,7 @@ jobs: steps: - name: Code checkout if: ${{ github.event_name != 'pull_request' }} - uses: actions/checkout@v4 + uses: actions/checkout@v5 with: fetch-depth: 0 - name: Install Docker diff --git a/.github/workflows/integration-x86-64.yaml b/.github/workflows/integration-x86-64.yaml index 80690512f5..8ed76f16a1 100644 --- a/.github/workflows/integration-x86-64.yaml +++ b/.github/workflows/integration-x86-64.yaml @@ -17,7 +17,7 @@ jobs: steps: - name: Code checkout if: ${{ github.event_name != 'pull_request' || (matrix.runner == 'garm-jammy' && matrix.libc == 'gnu') }} - uses: actions/checkout@v4 + uses: actions/checkout@v5 with: fetch-depth: 0 - name: Install Docker diff --git a/.github/workflows/lychee.yaml b/.github/workflows/lychee.yaml index 68271c509b..191d53e6a3 100644 --- a/.github/workflows/lychee.yaml +++ b/.github/workflows/lychee.yaml @@ -6,7 +6,7 @@ jobs: runs-on: ubuntu-latest steps: - name: Code checkout - uses: actions/checkout@v4 + uses: actions/checkout@v5 with: # Fetch the entire history so git diff can compare against the base branch fetch-depth: 0 diff --git a/.github/workflows/openapi.yaml b/.github/workflows/openapi.yaml index 0cd5b848cc..9c1266e4d7 100644 --- a/.github/workflows/openapi.yaml +++ b/.github/workflows/openapi.yaml @@ -6,7 +6,7 @@ jobs: runs-on: ubuntu-latest container: openapitools/openapi-generator-cli steps: - - uses: actions/checkout@v4 + - uses: actions/checkout@v5 - name: Validate OpenAPI env: GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} diff --git a/.github/workflows/package-consistency.yaml b/.github/workflows/package-consistency.yaml index 0c57baa6c0..719aa3d8df 100644 --- a/.github/workflows/package-consistency.yaml +++ b/.github/workflows/package-consistency.yaml @@ -10,7 +10,7 @@ jobs: runs-on: ubuntu-latest steps: - name: Code checkout - uses: actions/checkout@v4 + uses: actions/checkout@v5 with: fetch-depth: 0 diff --git a/.github/workflows/preview-riscv64.yaml b/.github/workflows/preview-riscv64.yaml index 6e4c5071e3..02f796b590 100644 --- a/.github/workflows/preview-riscv64.yaml +++ b/.github/workflows/preview-riscv64.yaml @@ -19,7 +19,7 @@ jobs: steps: - name: Code checkout - uses: actions/checkout@v4 + uses: actions/checkout@v5 with: fetch-depth: 0 diff --git a/.github/workflows/quality.yaml b/.github/workflows/quality.yaml index 576acaffa9..feecbe60cb 100644 --- a/.github/workflows/quality.yaml +++ b/.github/workflows/quality.yaml @@ -29,7 +29,7 @@ jobs: steps: - name: Code checkout - uses: actions/checkout@v4 + uses: actions/checkout@v5 with: fetch-depth: 0 @@ -155,6 +155,6 @@ jobs: name: Typos / Spellcheck runs-on: ubuntu-latest steps: - - uses: actions/checkout@v4 + - uses: actions/checkout@v5 # Executes "typos ." - uses: crate-ci/typos@v1.35.3 diff --git a/.github/workflows/release.yaml b/.github/workflows/release.yaml index ef1eb3573c..44842475d1 100644 --- a/.github/workflows/release.yaml +++ b/.github/workflows/release.yaml @@ -29,7 +29,7 @@ jobs: runs-on: ubuntu-latest steps: - name: Code checkout - uses: actions/checkout@v4 + uses: actions/checkout@v5 - name: Install musl-gcc if: contains(matrix.platform.target, 'musl') run: sudo apt install -y musl-tools diff --git a/.github/workflows/reuse.yaml b/.github/workflows/reuse.yaml index a2161c2818..3a463eedcd 100644 --- a/.github/workflows/reuse.yaml +++ b/.github/workflows/reuse.yaml @@ -7,6 +7,6 @@ jobs: name: REUSE Compliance Check runs-on: ubuntu-latest steps: - - uses: actions/checkout@v4 + - uses: actions/checkout@v5 - name: REUSE Compliance Check uses: fsfe/reuse-action@v5 diff --git a/.github/workflows/shlint.yaml b/.github/workflows/shlint.yaml index 9089964f06..b9208f3f20 100644 --- a/.github/workflows/shlint.yaml +++ b/.github/workflows/shlint.yaml @@ -12,7 +12,7 @@ jobs: runs-on: ubuntu-latest steps: - name: Checkout repository - uses: actions/checkout@v4 + uses: actions/checkout@v5 - name: Run the shell script checkers uses: luizm/action-sh-checker@master env: diff --git a/.github/workflows/taplo.yaml b/.github/workflows/taplo.yaml index 2b1e618984..75b61d9236 100644 --- a/.github/workflows/taplo.yaml +++ b/.github/workflows/taplo.yaml @@ -10,7 +10,7 @@ jobs: runs-on: ubuntu-latest steps: - name: Code checkout - uses: actions/checkout@v4 + uses: actions/checkout@v5 - name: Install Rust toolchain uses: dtolnay/rust-toolchain@stable - name: Install build dependencies From 10b79431f6c332d0f53f61abeb4207ec95c3f52f Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Wed, 13 Aug 2025 23:04:01 +0000 Subject: [PATCH 094/294] build: Bump linux-loader from `d5f39c0` to `5fdaed8` Bumps [linux-loader](https://github.com/rust-vmm/linux-loader) from `d5f39c0` to `5fdaed8`. - [Release notes](https://github.com/rust-vmm/linux-loader/releases) - [Commits](https://github.com/rust-vmm/linux-loader/compare/d5f39c09d59c8f50d5313b78ce4de511b12d1848...5fdaed87ddafc89d6abf0b50195a12d19133000d) --- updated-dependencies: - dependency-name: linux-loader dependency-version: 5fdaed87ddafc89d6abf0b50195a12d19133000d dependency-type: direct:production ... Signed-off-by: dependabot[bot] --- Cargo.lock | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.lock b/Cargo.lock index a0d22f949d..969857013a 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1145,7 +1145,7 @@ dependencies = [ [[package]] name = "linux-loader" version = "0.13.0" -source = "git+https://github.com/rust-vmm/linux-loader?branch=main#d5f39c09d59c8f50d5313b78ce4de511b12d1848" +source = "git+https://github.com/rust-vmm/linux-loader?branch=main#5fdaed87ddafc89d6abf0b50195a12d19133000d" dependencies = [ "vm-memory", ] From 34385e99f20058e5a537ea36d795844c12fc2258 Mon Sep 17 00:00:00 2001 From: Peter Oskolkov Date: Wed, 13 Aug 2025 19:26:24 +0000 Subject: [PATCH 095/294] misc: simplify configure_vcpu() signature on x86_64 It is always called with topology provided, so there is no need to pass topology as an Option. Simplifying the signature makes further topology-related changes to arc/src/x86_64 module simpler. Signed-off-by: Peter Oskolkov --- arch/src/x86_64/mod.rs | 10 +++++----- vmm/src/cpu.rs | 10 +++++----- 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/arch/src/x86_64/mod.rs b/arch/src/x86_64/mod.rs index 6dcac04f1b..e4c626835d 100644 --- a/arch/src/x86_64/mod.rs +++ b/arch/src/x86_64/mod.rs @@ -832,9 +832,9 @@ pub fn configure_vcpu( cpuid: Vec, kvm_hyperv: bool, cpu_vendor: CpuVendor, - topology: Option<(u16, u16, u16, u16)>, + topology: (u16, u16, u16, u16), ) -> super::Result<()> { - let x2apic_id = get_x2apic_id(id, topology); + let x2apic_id = get_x2apic_id(id, Some(topology)); // Per vCPU CPUID changes; common are handled via generate_common_cpuid() let mut cpuid = cpuid; @@ -856,9 +856,9 @@ pub fn configure_vcpu( } assert!(apic_id_patched); - if let Some(t) = topology { - update_cpuid_topology(&mut cpuid, t.0, t.1, t.2, t.3, cpu_vendor, id); - } + update_cpuid_topology( + &mut cpuid, topology.0, topology.1, topology.2, topology.3, cpu_vendor, id, + ); // The TSC frequency CPUID leaf should not be included when running with HyperV emulation if !kvm_hyperv { diff --git a/vmm/src/cpu.rs b/vmm/src/cpu.rs index 7b16b39886..629b0152b6 100644 --- a/vmm/src/cpu.rs +++ b/vmm/src/cpu.rs @@ -390,7 +390,7 @@ impl Vcpu { boot_setup: Option<(EntryPoint, &GuestMemoryAtomic)>, #[cfg(target_arch = "x86_64")] cpuid: Vec, #[cfg(target_arch = "x86_64")] kvm_hyperv: bool, - #[cfg(target_arch = "x86_64")] topology: Option<(u16, u16, u16, u16)>, + #[cfg(target_arch = "x86_64")] topology: (u16, u16, u16, u16), ) -> Result<()> { #[cfg(target_arch = "aarch64")] { @@ -885,20 +885,20 @@ impl CpuManager { #[cfg(target_arch = "x86_64")] let topology = self.config.topology.clone().map_or_else( || { - Some(( + ( 1_u16, u16::try_from(self.boot_vcpus()).unwrap(), 1_u16, 1_u16, - )) + ) }, |t| { - Some(( + ( t.threads_per_core.into(), t.cores_per_die.into(), t.dies_per_package.into(), t.packages.into(), - )) + ) }, ); #[cfg(target_arch = "x86_64")] From 84613d4273481f8a11845d73fbcdc1809ca54c33 Mon Sep 17 00:00:00 2001 From: Peter Oskolkov Date: Wed, 13 Aug 2025 19:36:56 +0000 Subject: [PATCH 096/294] arch: enable x2apic mode on x86_64 if max apic id > 254 This patch builds on PRs https://github.com/cloud-hypervisor/cloud-hypervisor/pull/7231 https://github.com/cloud-hypervisor/cloud-hypervisor/pull/7261 As before, the changes won't be effective until config parsing/validation raises the vCPU limit. Signed-off-by: Barret Rhoden Signed-off-by: Neel Natu Signed-off-by: Ofir Weisse Signed-off-by: Peter Oskolkov --- arch/src/x86_64/mod.rs | 16 +++++++++++++++- arch/src/x86_64/regs.rs | 16 +++++++++++++--- 2 files changed, 28 insertions(+), 4 deletions(-) diff --git a/arch/src/x86_64/mod.rs b/arch/src/x86_64/mod.rs index e4c626835d..a4c0b88bf1 100644 --- a/arch/src/x86_64/mod.rs +++ b/arch/src/x86_64/mod.rs @@ -57,6 +57,8 @@ const KVM_FEATURE_ASYNC_PF_VMEXIT_BIT: u8 = 10; #[cfg(feature = "tdx")] const KVM_FEATURE_STEAL_TIME_BIT: u8 = 5; +const KVM_FEATURE_MSI_EXT_DEST_ID: u8 = 15; + pub const _NSIG: i32 = 65; #[derive(Debug, Copy, Clone)] @@ -745,6 +747,10 @@ pub fn generate_common_cpuid( entry.eax = (entry.eax & 0xffff_ff00) | (config.phys_bits as u32 & 0xff); } 0x4000_0001 => { + // Enable KVM_FEATURE_MSI_EXT_DEST_ID. This allows the guest to target + // device interrupts to cpus with APIC IDs > 254 without interrupt remapping. + entry.eax |= 1 << KVM_FEATURE_MSI_EXT_DEST_ID; + // These features are not supported by TDX #[cfg(feature = "tdx")] if config.tdx { @@ -903,7 +909,15 @@ pub fn configure_vcpu( if let Some((kernel_entry_point, guest_memory)) = boot_setup { regs::setup_regs(vcpu, kernel_entry_point).map_err(Error::RegsConfiguration)?; regs::setup_fpu(vcpu).map_err(Error::FpuConfiguration)?; - regs::setup_sregs(&guest_memory.memory(), vcpu).map_err(Error::SregsConfiguration)?; + + // CPUs are required (by Intel sdm spec) to boot in x2apic mode if any + // of the apic IDs is larger than 255. Experimentally, the Linux kernel + // does not recognize the last vCPU if x2apic is not enabled when + // there are 256 vCPUs in a flat hierarchy (i.e. max x2apic ID is 255), + // so we need to enable x2apic in this case as well. + let enable_x2_apic_mode = get_max_x2apic_id(topology) >= 255; + regs::setup_sregs(&guest_memory.memory(), vcpu, enable_x2_apic_mode) + .map_err(Error::SregsConfiguration)?; } interrupts::set_lint(vcpu).map_err(|e| Error::LocalIntConfiguration(e.into()))?; Ok(()) diff --git a/arch/src/x86_64/regs.rs b/arch/src/x86_64/regs.rs index 3826fdb6ce..706dcd0622 100644 --- a/arch/src/x86_64/regs.rs +++ b/arch/src/x86_64/regs.rs @@ -119,9 +119,13 @@ pub fn setup_regs(vcpu: &Arc, entry_point: EntryPoint) -> /// /// * `mem` - The memory that will be passed to the guest. /// * `vcpu` - Structure for the VCPU that holds the VCPU's fd. -pub fn setup_sregs(mem: &GuestMemoryMmap, vcpu: &Arc) -> Result<()> { +pub fn setup_sregs( + mem: &GuestMemoryMmap, + vcpu: &Arc, + enable_x2_apic_mode: bool, +) -> Result<()> { let mut sregs: SpecialRegisters = vcpu.get_sregs().map_err(Error::GetStatusRegisters)?; - configure_segments_and_sregs(mem, &mut sregs)?; + configure_segments_and_sregs(mem, &mut sregs, enable_x2_apic_mode)?; vcpu.set_sregs(&sregs).map_err(Error::SetStatusRegisters) } @@ -148,6 +152,7 @@ fn write_idt_value(val: u64, guest_mem: &GuestMemoryMmap) -> Result<()> { pub fn configure_segments_and_sregs( mem: &GuestMemoryMmap, sregs: &mut SpecialRegisters, + enable_x2_apic_mode: bool, ) -> Result<()> { let gdt_table: [u64; BOOT_GDT_MAX] = { // Configure GDT entries as specified by PVH boot protocol @@ -183,6 +188,11 @@ pub fn configure_segments_and_sregs( sregs.cr0 = CR0_PE; sregs.cr4 = 0; + if enable_x2_apic_mode { + const X2APIC_ENABLE_BIT: u64 = 1 << 10; + sregs.apic_base |= X2APIC_ENABLE_BIT; + } + Ok(()) } @@ -204,7 +214,7 @@ mod tests { fn segments_and_sregs() { let mut sregs: SpecialRegisters = Default::default(); let gm = create_guest_mem(); - configure_segments_and_sregs(&gm, &mut sregs).unwrap(); + configure_segments_and_sregs(&gm, &mut sregs, false).unwrap(); assert_eq!(0x0, read_u64(&gm, BOOT_GDT_START)); assert_eq!( 0xcf9b000000ffff, From 55212b043702657671636238ad9cb228cbe57dc0 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Thu, 14 Aug 2025 00:04:17 +0000 Subject: [PATCH 097/294] build: Bump crate-ci/typos from 1.35.3 to 1.35.4 Bumps [crate-ci/typos](https://github.com/crate-ci/typos) from 1.35.3 to 1.35.4. - [Release notes](https://github.com/crate-ci/typos/releases) - [Changelog](https://github.com/crate-ci/typos/blob/master/CHANGELOG.md) - [Commits](https://github.com/crate-ci/typos/compare/v1.35.3...v1.35.4) --- updated-dependencies: - dependency-name: crate-ci/typos dependency-version: 1.35.4 dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] --- .github/workflows/quality.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/quality.yaml b/.github/workflows/quality.yaml index feecbe60cb..fb733b1a09 100644 --- a/.github/workflows/quality.yaml +++ b/.github/workflows/quality.yaml @@ -157,4 +157,4 @@ jobs: steps: - uses: actions/checkout@v5 # Executes "typos ." - - uses: crate-ci/typos@v1.35.3 + - uses: crate-ci/typos@v1.35.4 From bd17c84d3c4f2b3582851bf995cd9ac26c78d92f Mon Sep 17 00:00:00 2001 From: Songqian Li Date: Tue, 17 Jun 2025 16:56:40 +0800 Subject: [PATCH 098/294] virtio-devices: move userspace mapping to vm-device Move UserspaceMapping to vm-device to avoid redefinition since UserspaceMapping is used by both `virtio-devices` and `device` crate. Signed-off-by: Songqian Li --- Cargo.lock | 1 + fuzz/fuzz_targets/pmem.rs | 3 ++- virtio-devices/src/device.rs | 10 +--------- virtio-devices/src/lib.rs | 4 ++-- virtio-devices/src/pmem.rs | 5 +++-- virtio-devices/src/vhost_user/fs.rs | 6 +++--- vm-device/Cargo.toml | 1 + vm-device/src/lib.rs | 10 ++++++++++ vmm/src/device_manager.rs | 4 ++-- 9 files changed, 25 insertions(+), 19 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 969857013a..dede7fb8d4 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2403,6 +2403,7 @@ dependencies = [ "serde", "thiserror 2.0.12", "vfio-ioctls", + "vm-memory", "vmm-sys-util", ] diff --git a/fuzz/fuzz_targets/pmem.rs b/fuzz/fuzz_targets/pmem.rs index e8cb488e77..e9247fb631 100644 --- a/fuzz/fuzz_targets/pmem.rs +++ b/fuzz/fuzz_targets/pmem.rs @@ -12,8 +12,9 @@ use std::{ffi, io}; use libc::{MAP_NORESERVE, MAP_PRIVATE, PROT_READ, PROT_WRITE}; use libfuzzer_sys::{fuzz_target, Corpus}; use seccompiler::SeccompAction; -use virtio_devices::{Pmem, UserspaceMapping, VirtioDevice, VirtioInterrupt, VirtioInterruptType}; +use virtio_devices::{Pmem, VirtioDevice, VirtioInterrupt, VirtioInterruptType}; use virtio_queue::{Queue, QueueT}; +use vm_device::UserspaceMapping; use vm_memory::bitmap::AtomicBitmap; use vm_memory::guest_memory::FileOffset; use vm_memory::{Bytes, GuestAddress, GuestMemoryAtomic, MmapRegion}; diff --git a/virtio-devices/src/device.rs b/virtio-devices/src/device.rs index 06b245e271..c0d24902a1 100644 --- a/virtio-devices/src/device.rs +++ b/virtio-devices/src/device.rs @@ -15,6 +15,7 @@ use std::thread; use libc::EFD_NONBLOCK; use virtio_queue::Queue; +use vm_device::UserspaceMapping; use vm_memory::{GuestAddress, GuestMemoryAtomic, GuestUsize}; use vm_migration::{MigratableError, Pausable}; use vm_virtio::{AccessPlatform, VirtioDeviceType}; @@ -37,15 +38,6 @@ pub trait VirtioInterrupt: Send + Sync { } } -#[derive(Clone)] -pub struct UserspaceMapping { - pub host_addr: u64, - pub mem_slot: u32, - pub addr: GuestAddress, - pub len: GuestUsize, - pub mergeable: bool, -} - #[derive(Clone)] pub struct VirtioSharedMemory { pub offset: u64, diff --git a/virtio-devices/src/lib.rs b/virtio-devices/src/lib.rs index 6a21eb2734..a59661eb61 100644 --- a/virtio-devices/src/lib.rs +++ b/virtio-devices/src/lib.rs @@ -47,8 +47,8 @@ pub use self::balloon::Balloon; pub use self::block::{Block, BlockState}; pub use self::console::{Console, ConsoleResizer, Endpoint}; pub use self::device::{ - DmaRemapping, UserspaceMapping, VirtioCommon, VirtioDevice, VirtioInterrupt, - VirtioInterruptType, VirtioSharedMemoryList, + DmaRemapping, VirtioCommon, VirtioDevice, VirtioInterrupt, VirtioInterruptType, + VirtioSharedMemoryList, }; pub use self::epoll_helper::{ EpollHelper, EpollHelperError, EpollHelperHandler, EPOLL_HELPER_EVENT_LAST, diff --git a/virtio-devices/src/pmem.rs b/virtio-devices/src/pmem.rs index 5f7ee9457b..1e4353477c 100644 --- a/virtio-devices/src/pmem.rs +++ b/virtio-devices/src/pmem.rs @@ -18,6 +18,7 @@ use seccompiler::SeccompAction; use serde::{Deserialize, Serialize}; use thiserror::Error; use virtio_queue::{DescriptorChain, Queue, QueueT}; +use vm_device::UserspaceMapping; use vm_memory::{ Address, ByteValued, Bytes, GuestAddress, GuestAddressSpace, GuestMemoryAtomic, GuestMemoryError, GuestMemoryLoadGuard, @@ -28,8 +29,8 @@ use vmm_sys_util::eventfd::EventFd; use super::{ ActivateError, ActivateResult, EpollHelper, EpollHelperError, EpollHelperHandler, - Error as DeviceError, UserspaceMapping, VirtioCommon, VirtioDevice, VirtioDeviceType, - EPOLL_HELPER_EVENT_LAST, VIRTIO_F_IOMMU_PLATFORM, VIRTIO_F_VERSION_1, + Error as DeviceError, VirtioCommon, VirtioDevice, VirtioDeviceType, EPOLL_HELPER_EVENT_LAST, + VIRTIO_F_IOMMU_PLATFORM, VIRTIO_F_VERSION_1, }; use crate::seccomp_filters::Thread; use crate::thread_helper::spawn_virtio_thread; diff --git a/virtio-devices/src/vhost_user/fs.rs b/virtio-devices/src/vhost_user/fs.rs index 1a24f1c2dd..4aa6920475 100644 --- a/virtio-devices/src/vhost_user/fs.rs +++ b/virtio-devices/src/vhost_user/fs.rs @@ -11,6 +11,7 @@ use serde_with::{serde_as, Bytes}; use vhost::vhost_user::message::{VhostUserProtocolFeatures, VhostUserVirtioFeatures}; use vhost::vhost_user::{FrontendReqHandler, VhostUserFrontend, VhostUserFrontendReqHandler}; use virtio_queue::Queue; +use vm_device::UserspaceMapping; use vm_memory::{ByteValued, GuestMemoryAtomic}; use vm_migration::protocol::MemoryRangeTable; use vm_migration::{Migratable, MigratableError, Pausable, Snapshot, Snapshottable, Transportable}; @@ -22,9 +23,8 @@ use crate::seccomp_filters::Thread; use crate::thread_helper::spawn_virtio_thread; use crate::vhost_user::VhostUserCommon; use crate::{ - ActivateResult, GuestMemoryMmap, GuestRegionMmap, MmapRegion, UserspaceMapping, VirtioCommon, - VirtioDevice, VirtioDeviceType, VirtioInterrupt, VirtioSharedMemoryList, - VIRTIO_F_IOMMU_PLATFORM, + ActivateResult, GuestMemoryMmap, GuestRegionMmap, MmapRegion, VirtioCommon, VirtioDevice, + VirtioDeviceType, VirtioInterrupt, VirtioSharedMemoryList, VIRTIO_F_IOMMU_PLATFORM, }; const NUM_QUEUE_OFFSET: usize = 1; diff --git a/vm-device/Cargo.toml b/vm-device/Cargo.toml index 9df6af3a0a..ca1d38807d 100644 --- a/vm-device/Cargo.toml +++ b/vm-device/Cargo.toml @@ -14,4 +14,5 @@ hypervisor = { path = "../hypervisor" } serde = { workspace = true, features = ["derive", "rc"] } thiserror = { workspace = true } vfio-ioctls = { workspace = true, default-features = false } +vm-memory = { workspace = true, features = ["backend-mmap"] } vmm-sys-util = { workspace = true } diff --git a/vm-device/src/lib.rs b/vm-device/src/lib.rs index c10731ea95..f484e9f14e 100644 --- a/vm-device/src/lib.rs +++ b/vm-device/src/lib.rs @@ -4,6 +4,7 @@ // use serde::{Deserialize, Serialize}; +use vm_memory::{GuestAddress, GuestUsize}; mod bus; pub mod dma_mapping; @@ -58,3 +59,12 @@ pub enum Resource { /// KVM memslot index. KvmMemSlot(u32), } + +#[derive(Clone)] +pub struct UserspaceMapping { + pub host_addr: u64, + pub mem_slot: u32, + pub addr: GuestAddress, + pub len: GuestUsize, + pub mergeable: bool, +} diff --git a/vmm/src/device_manager.rs b/vmm/src/device_manager.rs index f43c50b11f..ac1c0a406d 100644 --- a/vmm/src/device_manager.rs +++ b/vmm/src/device_manager.rs @@ -95,7 +95,7 @@ use vm_device::dma_mapping::ExternalDmaMapping; use vm_device::interrupt::{ InterruptIndex, InterruptManager, LegacyIrqGroupConfig, MsiIrqGroupConfig, }; -use vm_device::{Bus, BusDevice, BusDeviceSync, Resource}; +use vm_device::{Bus, BusDevice, BusDeviceSync, Resource, UserspaceMapping}; use vm_memory::guest_memory::FileOffset; use vm_memory::{Address, GuestAddress, GuestMemoryRegion, GuestUsize, MmapRegion}; #[cfg(target_arch = "x86_64")] @@ -3231,7 +3231,7 @@ impl DeviceManager { .create_userspace_mapping(region_base, region_size, host_addr, false, false, false) .map_err(DeviceManagerError::MemoryManager)?; - let mapping = virtio_devices::UserspaceMapping { + let mapping = UserspaceMapping { host_addr, mem_slot, addr: GuestAddress(region_base), From c72414552bfbfc2a7cc4af150ce3a6a5ff3b6ab0 Mon Sep 17 00:00:00 2001 From: Songqian Li Date: Thu, 29 Aug 2024 15:53:32 +0800 Subject: [PATCH 099/294] devices: introduce ivshmem device This patch introduces the inter-vm shared memory(ivshmem) device to share a memory region between multiple processes running different guests and the host. This patch supports the basic ivshmem functions like ivshmem-plain in QEMU[1]. [1] https://www.qemu.org/docs/master/specs/ivshmem-spec.html Signed-off-by: Yi Wang Signed-off-by: Songqian Li --- devices/Cargo.toml | 1 + devices/src/ivshmem.rs | 361 +++++++++++++++++++++++++++++++++++++++++ devices/src/lib.rs | 4 + 3 files changed, 366 insertions(+) create mode 100644 devices/src/ivshmem.rs diff --git a/devices/Cargo.toml b/devices/Cargo.toml index d2fcc4a94a..2b985085f4 100644 --- a/devices/Cargo.toml +++ b/devices/Cargo.toml @@ -45,5 +45,6 @@ arch = { path = "../arch" } [features] default = [] fw_cfg = ["arch/fw_cfg", "bitfield-struct", "linux-loader", "zerocopy"] +ivshmem = [] kvm = ["arch/kvm"] pvmemcontrol = [] diff --git a/devices/src/ivshmem.rs b/devices/src/ivshmem.rs new file mode 100644 index 0000000000..6e9ef4293e --- /dev/null +++ b/devices/src/ivshmem.rs @@ -0,0 +1,361 @@ +// Copyright © 2024 Tencent Corporation. All rights reserved. +// +// SPDX-License-Identifier: Apache-2.0 +// + +use std::any::Any; +use std::path::PathBuf; +use std::result; +use std::sync::atomic::{AtomicU32, Ordering}; +use std::sync::{Arc, Barrier, Mutex}; + +use anyhow::anyhow; +use byteorder::{ByteOrder, LittleEndian}; +use pci::{ + BarReprogrammingParams, PciBarConfiguration, PciBarPrefetchable, PciBarRegionType, + PciClassCode, PciConfiguration, PciDevice, PciDeviceError, PciHeaderType, PciSubclass, + PCI_CONFIGURATION_ID, +}; +use serde::{Deserialize, Serialize}; +use thiserror::Error; +use vm_allocator::{AddressAllocator, SystemAllocator}; +use vm_device::{BusDevice, Resource, UserspaceMapping}; +use vm_memory::bitmap::AtomicBitmap; +use vm_memory::{Address, GuestAddress}; +use vm_migration::{Migratable, MigratableError, Pausable, Snapshot, Snapshottable, Transportable}; +use vmm_sys_util::eventfd::EventFd; + +const IVSHMEM_BAR0_IDX: usize = 0; +const IVSHMEM_BAR1_IDX: usize = 1; +const IVSHMEM_BAR2_IDX: usize = 2; + +const IVSHMEM_VENDOR_ID: u16 = 0x1af4; +const IVSHMEM_DEVICE_ID: u16 = 0x1110; + +const IVSHMEM_REG_BAR_SIZE: u64 = 0x100; + +type GuestRegionMmap = vm_memory::GuestRegionMmap; + +#[derive(Debug, Error)] +pub enum IvshmemError { + #[error("Failed to retrieve PciConfigurationState: {0}")] + RetrievePciConfigurationState(#[source] anyhow::Error), + #[error("Failed to retrieve IvshmemDeviceState: {0}")] + RetrieveIvshmemDeviceStateState(#[source] anyhow::Error), +} + +#[derive(Copy, Clone)] +pub enum IvshmemSubclass { + Other = 0x00, +} + +impl PciSubclass for IvshmemSubclass { + fn get_register_value(&self) -> u8 { + *self as u8 + } +} + +pub struct IvshmemDevice { + id: String, + + // ivshmem device registers + interrupt_mask: u32, + interrupt_status: Arc, + iv_position: u32, + doorbell: u32, + + // PCI configuration registers. + configuration: PciConfiguration, + bar_regions: Vec, + + region: Option>, + region_size: u64, + userspace_mapping: Option, + reprogram_evt: EventFd, +} + +#[derive(Serialize, Deserialize, Default, Clone)] +pub struct IvshmemDeviceState { + interrupt_mask: u32, + interrupt_status: u32, + iv_position: u32, + doorbell: u32, +} + +impl IvshmemDevice { + pub fn new( + id: String, + region_size: u64, + snapshot: Option, + ) -> Result { + let pci_configuration_state = + vm_migration::state_from_id(snapshot.as_ref(), PCI_CONFIGURATION_ID).map_err(|e| { + IvshmemError::RetrievePciConfigurationState(anyhow!( + "Failed to get PciConfigurationState from Snapshot: {e}", + e + )) + })?; + + let state: Option = snapshot + .as_ref() + .map(|s| s.to_state()) + .transpose() + .map_err(|e| { + IvshmemError::RetrieveIvshmemDeviceStateState(anyhow!( + "Failed to get IvshmemDeviceState from Snapshot: {e}", + )) + })?; + + let configuration = PciConfiguration::new( + IVSHMEM_VENDOR_ID, + IVSHMEM_DEVICE_ID, + 0x1, + PciClassCode::MemoryController, + &IvshmemSubclass::Other, + None, + PciHeaderType::Device, + 0, + 0, + None, + pci_configuration_state, + ); + + let device = if let Some(s) = state { + IvshmemDevice { + id, + configuration, + bar_regions: vec![], + interrupt_mask: s.interrupt_mask, + interrupt_status: Arc::new(AtomicU32::new(s.interrupt_status)), + iv_position: s.iv_position, + doorbell: s.doorbell, + region_size, + region: None, + userspace_mapping: None, + } + } else { + IvshmemDevice { + id, + configuration, + bar_regions: vec![], + interrupt_mask: 0, + interrupt_status: Arc::new(AtomicU32::new(0)), + iv_position: 0, + doorbell: 0, + region_size, + region: None, + userspace_mapping: None, + } + }; + Ok(device) + } + + pub fn config_bar_addr(&self) -> u64 { + self.configuration.get_bar_addr(IVSHMEM_BAR0_IDX) + } + + pub fn data_bar_addr(&self) -> u64 { + self.configuration.get_bar_addr(IVSHMEM_BAR2_IDX) + } + + fn state(&self) -> IvshmemDeviceState { + IvshmemDeviceState { + interrupt_mask: self.interrupt_mask, + interrupt_status: self.interrupt_status.load(Ordering::SeqCst), + iv_position: self.iv_position, + doorbell: self.doorbell, + } + } +} + +impl BusDevice for IvshmemDevice { + fn read(&mut self, base: u64, offset: u64, data: &mut [u8]) { + self.read_bar(base, offset, data) + } + + fn write(&mut self, base: u64, offset: u64, data: &[u8]) -> Option> { + self.write_bar(base, offset, data) + } +} + +impl PciDevice for IvshmemDevice { + fn allocate_bars( + &mut self, + _allocator: &Arc>, + mmio32_allocator: &mut AddressAllocator, + mmio64_allocator: &mut AddressAllocator, + resources: Option>, + ) -> std::result::Result, PciDeviceError> { + let mut bars = Vec::new(); + let mut bar0_addr = None; + let mut bar2_addr = None; + + let restoring = resources.is_some(); + if let Some(resources) = resources { + for resource in resources { + match resource { + Resource::PciBar { index, base, .. } => { + match index { + IVSHMEM_BAR0_IDX => { + bar0_addr = Some(GuestAddress(base)); + } + IVSHMEM_BAR1_IDX => {} + IVSHMEM_BAR2_IDX => { + bar2_addr = Some(GuestAddress(base)); + } + _ => { + error!("Unexpected pci bar index {index}"); + } + }; + } + _ => { + error!("Unexpected resource {resource:?}"); + } + } + } + if bar0_addr.is_none() || bar2_addr.is_none() { + return Err(PciDeviceError::MissingResource); + } + } + + // BAR0 holds device registers (256 Byte MMIO) + let bar0_addr = mmio32_allocator + .allocate(bar0_addr, IVSHMEM_REG_BAR_SIZE, None) + .ok_or(PciDeviceError::IoAllocationFailed(IVSHMEM_REG_BAR_SIZE))?; + debug!("ivshmem bar0 address 0x{:x}", bar0_addr.0); + + let bar0 = PciBarConfiguration::default() + .set_index(IVSHMEM_BAR0_IDX) + .set_address(bar0_addr.raw_value()) + .set_size(IVSHMEM_REG_BAR_SIZE) + .set_region_type(PciBarRegionType::Memory32BitRegion) + .set_prefetchable(PciBarPrefetchable::NotPrefetchable); + + // BAR1 holds MSI-X table and PBA (only ivshmem-doorbell). + + // BAR2 maps the shared memory object + let bar2_size = self.region_size; + let bar2_addr = mmio64_allocator + .allocate(bar2_addr, bar2_size, None) + .ok_or(PciDeviceError::IoAllocationFailed(bar2_size))?; + debug!("ivshmem bar2 address 0x{:x}", bar2_addr.0); + + let bar2 = PciBarConfiguration::default() + .set_index(IVSHMEM_BAR2_IDX) + .set_address(bar2_addr.raw_value()) + .set_size(bar2_size) + .set_region_type(PciBarRegionType::Memory64BitRegion) + .set_prefetchable(PciBarPrefetchable::Prefetchable); + + if !restoring { + self.configuration + .add_pci_bar(&bar0) + .map_err(|e| PciDeviceError::IoRegistrationFailed(bar0_addr.raw_value(), e))?; + self.configuration + .add_pci_bar(&bar2) + .map_err(|e| PciDeviceError::IoRegistrationFailed(bar2_addr.raw_value(), e))?; + } + + bars.push(bar0); + bars.push(bar2); + self.bar_regions = bars.clone(); + + Ok(bars) + } + + fn free_bars( + &mut self, + _allocator: &mut SystemAllocator, + _mmio32_allocator: &mut AddressAllocator, + _mmio64_allocator: &mut AddressAllocator, + ) -> std::result::Result<(), PciDeviceError> { + unimplemented!("Device hotplug and remove are not supported for ivshmem"); + } + + fn write_config_register( + &mut self, + reg_idx: usize, + offset: u64, + data: &[u8], + ) -> (Vec, Option>) { + ( + self.configuration + .write_config_register(reg_idx, offset, data), + None, + ) + } + + fn read_config_register(&mut self, reg_idx: usize) -> u32 { + self.configuration.read_reg(reg_idx) + } + + fn read_bar(&mut self, base: u64, offset: u64, data: &mut [u8]) { + debug!("read base {base:x} offset {offset}"); + + let mut bar_idx = 0; + for (idx, bar) in self.bar_regions.iter().enumerate() { + if bar.addr() == base { + bar_idx = idx; + } + } + match bar_idx { + // bar 0 + 0 => { + // ivshmem don't use interrupt, we return zero now. + LittleEndian::write_u32(data, 0); + } + // bar 2 + 1 => warn!("Unexpected read ivshmem memory idx: {offset}"), + _ => { + warn!("Invalid bar_idx: {bar_idx}"); + } + }; + } + + fn write_bar(&mut self, base: u64, offset: u64, _data: &[u8]) -> Option> { + debug!("write base {base:x} offset {offset}"); + warn!("Unexpected write ivshmem memory idx: {offset}"); + None + } + + fn move_bar(&mut self, old_base: u64, new_base: u64) -> result::Result<(), std::io::Error> { + for bar in self.bar_regions.iter_mut() { + if bar.addr() == old_base { + *bar = bar.set_address(new_base); + } + } + + Ok(()) + } + + fn as_any_mut(&mut self) -> &mut dyn Any { + self + } + + fn id(&self) -> Option { + Some(self.id.clone()) + } +} + +impl Pausable for IvshmemDevice {} + +impl Snapshottable for IvshmemDevice { + fn id(&self) -> String { + self.id.clone() + } + + // The snapshot/restore (also live migration) support only work for ivshmem-plain mode. + // Additional work is needed for supporting ivshmem-doorbell. + fn snapshot(&mut self) -> std::result::Result { + let mut snapshot = Snapshot::new_from_state(&self.state())?; + + // Snapshot PciConfiguration + snapshot.add_snapshot(self.configuration.id(), self.configuration.snapshot()?); + + Ok(snapshot) + } +} + +impl Transportable for IvshmemDevice {} + +impl Migratable for IvshmemDevice {} diff --git a/devices/src/lib.rs b/devices/src/lib.rs index 6ea4bc70bb..d7ac32d9d5 100644 --- a/devices/src/lib.rs +++ b/devices/src/lib.rs @@ -24,6 +24,8 @@ pub mod gic; pub mod interrupt_controller; #[cfg(target_arch = "x86_64")] pub mod ioapic; +#[cfg(feature = "ivshmem")] +pub mod ivshmem; pub mod legacy; #[cfg(feature = "pvmemcontrol")] pub mod pvmemcontrol; @@ -33,6 +35,8 @@ pub mod pvpanic; pub mod tpm; pub use self::acpi::{AcpiGedDevice, AcpiPmTimerDevice, AcpiShutdownDevice}; +#[cfg(feature = "ivshmem")] +pub use self::ivshmem::IvshmemDevice; pub use self::pvpanic::{PvPanicDevice, PVPANIC_DEVICE_MMIO_SIZE}; bitflags! { From 2c282a5a540e7d694fa3b83f3abba85c101c481d Mon Sep 17 00:00:00 2001 From: Songqian Li Date: Thu, 29 Aug 2024 15:58:50 +0800 Subject: [PATCH 100/294] vmm: ivshmem device support Signed-off-by: Yi Wang Signed-off-by: Songqian Li --- Cargo.toml | 1 + src/main.rs | 10 +++++ vmm/Cargo.toml | 1 + vmm/src/config.rs | 89 +++++++++++++++++++++++++++++++++++++++ vmm/src/device_manager.rs | 57 +++++++++++++++++++++++++ vmm/src/lib.rs | 4 ++ vmm/src/vm_config.rs | 22 ++++++++++ 7 files changed, 184 insertions(+) diff --git a/Cargo.toml b/Cargo.toml index 89f994aafe..c621c256d4 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -68,6 +68,7 @@ fw_cfg = ["vmm/fw_cfg"] guest_debug = ["vmm/guest_debug"] igvm = ["mshv", "vmm/igvm"] io_uring = ["vmm/io_uring"] +ivshmem = ["vmm/ivshmem"] kvm = ["vmm/kvm"] mshv = ["vmm/mshv"] pvmemcontrol = ["vmm/pvmemcontrol"] diff --git a/src/main.rs b/src/main.rs index 4ba766e055..6daac338f7 100644 --- a/src/main.rs +++ b/src/main.rs @@ -29,6 +29,8 @@ use vmm::landlock::{Landlock, LandlockError}; use vmm::vm_config; #[cfg(feature = "fw_cfg")] use vmm::vm_config::FwCfgConfig; +#[cfg(feature = "ivshmem")] +use vmm::vm_config::IvshmemConfig; #[cfg(target_arch = "x86_64")] use vmm::vm_config::SgxEpcConfig; use vmm::vm_config::{ @@ -300,6 +302,12 @@ fn get_cli_options_sorted( .help("Path to initramfs image") .num_args(1) .group("vm-config"), + #[cfg(feature = "ivshmem")] + Arg::new("ivshmem") + .long("ivshmem") + .help(IvshmemConfig::SYNTAX) + .num_args(1) + .group("vm-config"), Arg::new("kernel") .long("kernel") .help( @@ -1034,6 +1042,8 @@ mod unit_tests { preserved_fds: None, landlock_enable: false, landlock_rules: None, + #[cfg(feature = "ivshmem")] + ivshmem: None, }; assert_eq!(expected_vm_config, result_vm_config); diff --git a/vmm/Cargo.toml b/vmm/Cargo.toml index 6b33e6344d..6ebabc01ae 100644 --- a/vmm/Cargo.toml +++ b/vmm/Cargo.toml @@ -12,6 +12,7 @@ fw_cfg = ["devices/fw_cfg"] guest_debug = ["gdbstub", "gdbstub_arch", "kvm"] igvm = ["dep:igvm", "hex", "igvm_defs", "mshv-bindings", "range_map_vec"] io_uring = ["block/io_uring"] +ivshmem = ["devices/ivshmem"] kvm = [ "arch/kvm", "hypervisor/kvm", diff --git a/vmm/src/config.rs b/vmm/src/config.rs index 28d8cc6d8c..1977c8728c 100644 --- a/vmm/src/config.rs +++ b/vmm/src/config.rs @@ -4,6 +4,8 @@ // use std::collections::{BTreeSet, HashMap}; +#[cfg(feature = "ivshmem")] +use std::fs; use std::path::PathBuf; use std::result; use std::str::FromStr; @@ -154,9 +156,17 @@ pub enum Error { /// Failed parsing TPM device #[error("Error parsing --tpm")] ParseTpm(#[source] OptionParserError), + #[cfg(feature = "ivshmem")] + /// Failed parsing ivsmem device + #[error("Error parsing --ivshmem")] + ParseIvshmem(#[source] OptionParserError), /// Missing path for TPM device #[error("Error parsing --tpm: path missing")] ParseTpmPathMissing, + #[cfg(feature = "ivshmem")] + /// Missing path for ivsmem device + #[error("Error parsing --ivshmem: path missing")] + ParseIvshmemPathMissing, /// Error parsing Landlock rules #[error("Error parsing --landlock-rules")] ParseLandlockRules(#[source] OptionParserError), @@ -334,6 +344,18 @@ pub enum ValidationError { /// FwCfg missing initramfs #[error("Error --fw-cfg-config: missing --initramfs")] FwCfgMissingInitramfs, + #[cfg(feature = "ivshmem")] + /// Invalid Ivshmem input size + #[error("Invalid ivshmem input size")] + InvalidIvshmemInputSize(u64), + #[cfg(feature = "ivshmem")] + /// Invalid Ivshmem backend file size + #[error("Invalid ivshmem backend file size")] + InvalidIvshmemSize(u64), + #[cfg(feature = "ivshmem")] + /// Invalid Ivshmem backend file path + #[error("Invalid ivshmem backend file path")] + InvalidIvshmemPath, } type ValidationResult = std::result::Result; @@ -391,6 +413,8 @@ pub struct VmParams<'a> { pub landlock_rules: Option>, #[cfg(feature = "fw_cfg")] pub fw_cfg_config: Option<&'a str>, + #[cfg(feature = "ivshmem")] + pub ivshmem: Option<&'a str>, } impl<'a> VmParams<'a> { @@ -465,6 +489,8 @@ impl<'a> VmParams<'a> { #[cfg(feature = "fw_cfg")] let fw_cfg_config: Option<&str> = args.get_one::("fw-cfg-config").map(|x| x as &str); + #[cfg(feature = "ivshmem")] + let ivshmem: Option<&str> = args.get_one::("ivshmem").map(|x| x as &str); VmParams { cpus, memory, @@ -508,6 +534,8 @@ impl<'a> VmParams<'a> { landlock_rules, #[cfg(feature = "fw_cfg")] fw_cfg_config, + #[cfg(feature = "ivshmem")] + ivshmem, } } } @@ -2397,6 +2425,47 @@ impl LandlockConfig { } } +#[cfg(feature = "ivshmem")] +impl IvshmemConfig { + pub const SYNTAX: &'static str = "Ivshmem device. Specify the backend file path and size \ + for the shared memory: \"path=, size=\" \ + \nThe must be a power of 2 (e.g., 2M, 4M, etc.), as it represents the size \ + of the memory region mapped to the guest. Default size is 128M."; + pub fn parse(ivshmem: &str) -> Result { + let mut parser = OptionParser::new(); + parser.add("path").add("size"); + parser.parse(ivshmem).map_err(Error::ParseIvshmem)?; + let path = parser + .get("path") + .map(PathBuf::from) + .ok_or(Error::ParseIvshmemPathMissing)?; + let size = parser + .convert::("size") + .map_err(Error::ParseIvshmem)? + .unwrap_or(ByteSized((DEFAULT_IVSHMEM_SIZE << 20) as u64)) + .0; + Ok(IvshmemConfig { + path, + size: size as usize, + }) + } + + pub fn validate(&self) -> ValidationResult<()> { + let size = self.size as u64; + let path = &self.path; + // size must = 2^n + if !size.is_power_of_two() { + return Err(ValidationError::InvalidIvshmemInputSize(size)); + } + let metadata = fs::metadata(path.to_str().unwrap()) + .map_err(|_| ValidationError::InvalidIvshmemPath)?; + if metadata.len() < size { + return Err(ValidationError::InvalidIvshmemSize(metadata.len())); + } + Ok(()) + } +} + impl VmConfig { fn validate_identifier( id_list: &mut BTreeSet, @@ -2754,6 +2823,10 @@ impl VmConfig { landlock_rule.validate()?; } } + #[cfg(feature = "ivshmem")] + if let Some(ivshmem_config) = &self.ivshmem { + ivshmem_config.validate()?; + } Ok(id_list) } @@ -2951,6 +3024,14 @@ impl VmConfig { ); } + #[cfg(feature = "ivshmem")] + let mut ivshmem: Option = None; + #[cfg(feature = "ivshmem")] + if let Some(iv) = vm_params.ivshmem { + let ivshmem_conf = IvshmemConfig::parse(iv)?; + ivshmem = Some(ivshmem_conf); + } + let mut config = VmConfig { cpus: CpusConfig::parse(vm_params.cpus)?, memory: MemoryConfig::parse(vm_params.memory, vm_params.memory_zones)?, @@ -2986,6 +3067,8 @@ impl VmConfig { preserved_fds: None, landlock_enable: vm_params.landlock_enable, landlock_rules, + #[cfg(feature = "ivshmem")] + ivshmem, }; config.validate().map_err(Error::Validation)?; Ok(config) @@ -3115,6 +3198,8 @@ impl Clone for VmConfig { // SAFETY: FFI call with valid FDs .map(|fds| fds.iter().map(|fd| unsafe { libc::dup(*fd) }).collect()), landlock_rules: self.landlock_rules.clone(), + #[cfg(feature = "ivshmem")] + ivshmem: self.ivshmem.clone(), ..*self } } @@ -3919,6 +4004,8 @@ mod tests { ]), landlock_enable: false, landlock_rules: None, + #[cfg(feature = "ivshmem")] + ivshmem: None, }; let valid_config = RestoreConfig { @@ -4114,6 +4201,8 @@ mod tests { preserved_fds: None, landlock_enable: false, landlock_rules: None, + #[cfg(feature = "ivshmem")] + ivshmem: None, }; valid_config.validate().unwrap(); diff --git a/vmm/src/device_manager.rs b/vmm/src/device_manager.rs index ac1c0a406d..9f32da8ba4 100644 --- a/vmm/src/device_manager.rs +++ b/vmm/src/device_manager.rs @@ -115,6 +115,8 @@ use crate::interrupt::{LegacyUserspaceInterruptManager, MsiInterruptManager}; use crate::memory_manager::{Error as MemoryManagerError, MemoryManager, MEMORY_MANAGER_ACPI_SIZE}; use crate::pci_segment::PciSegment; use crate::serial_manager::{Error as SerialManagerError, SerialManager}; +#[cfg(feature = "ivshmem")] +use crate::vm_config::IvshmemConfig; use crate::vm_config::{ ConsoleOutputMode, DeviceConfig, DiskConfig, FsConfig, NetConfig, PmemConfig, UserDeviceConfig, VdpaConfig, VhostMode, VmConfig, VsockConfig, DEFAULT_IOMMU_ADDRESS_WIDTH_BITS, @@ -140,6 +142,8 @@ const PVMEMCONTROL_DEVICE_NAME: &str = "__pvmemcontrol"; const BALLOON_DEVICE_NAME: &str = "__balloon"; const CONSOLE_DEVICE_NAME: &str = "__console"; const PVPANIC_DEVICE_NAME: &str = "__pvpanic"; +#[cfg(feature = "ivshmem")] +const IVSHMEM_DEVICE_NAME: &str = "__ivshmem"; // Devices that the user may name and for which we generate // identifiers if the user doesn't give one @@ -632,6 +636,11 @@ pub enum DeviceManagerError { #[error("Cannot create a PvPanic device")] PvPanicCreate(#[source] devices::pvpanic::PvPanicError), + #[cfg(feature = "ivshmem")] + /// Cannot create a ivshmem device + #[error("Cannot create a ivshmem device: {0}")] + IvshmemCreate(devices::ivshmem::IvshmemError), + /// Cannot create a RateLimiterGroup #[error("Cannot create a RateLimiterGroup")] RateLimiterGroupCreate(#[source] rate_limiter::group::Error), @@ -1085,6 +1094,10 @@ pub struct DeviceManager { #[cfg(feature = "fw_cfg")] fw_cfg: Option>>, + + #[cfg(feature = "ivshmem")] + // ivshmem device + ivshmem_device: Option>>, } fn create_mmio_allocators( @@ -1351,6 +1364,8 @@ impl DeviceManager { mmio_regions: Arc::new(Mutex::new(Vec::new())), #[cfg(feature = "fw_cfg")] fw_cfg: None, + #[cfg(feature = "ivshmem")] + ivshmem_device: None, }; let device_manager = Arc::new(Mutex::new(device_manager)); @@ -1474,6 +1489,11 @@ impl DeviceManager { self.pvpanic_device = self.add_pvpanic_device()?; } + #[cfg(feature = "ivshmem")] + if let Some(ivshmem) = self.config.clone().lock().unwrap().ivshmem.as_ref() { + self.ivshmem_device = self.add_ivshmem_device(ivshmem)?; + } + Ok(()) } @@ -4199,6 +4219,43 @@ impl DeviceManager { Ok(Some(pvpanic_device)) } + #[cfg(feature = "ivshmem")] + fn add_ivshmem_device( + &mut self, + ivshmem_cfg: &IvshmemConfig, + ) -> DeviceManagerResult>>> { + let id = String::from(IVSHMEM_DEVICE_NAME); + let pci_segment_id = 0x0_u16; + info!("Creating ivshmem device {}", id); + + let (pci_segment_id, pci_device_bdf, resources) = + self.pci_resources(&id, pci_segment_id)?; + let snapshot = snapshot_from_id(self.snapshot.as_ref(), id.as_str()); + + let ivshmem_device = Arc::new(Mutex::new( + devices::IvshmemDevice::new( + id.clone(), + ivshmem_cfg.size as u64, + snapshot, + ) + .map_err(DeviceManagerError::IvshmemCreate)?, + )); + let new_resources = self.add_pci_device( + ivshmem_device.clone(), + ivshmem_device.clone(), + pci_segment_id, + pci_device_bdf, + resources, + )?; + let mut node = device_node!(id, ivshmem_device); + node.resources = new_resources; + node.pci_bdf = Some(pci_device_bdf); + node.pci_device_handle = None; + self.device_tree.lock().unwrap().insert(id, node); + + Ok(Some(ivshmem_device)) + } + fn pci_resources( &self, id: &str, diff --git a/vmm/src/lib.rs b/vmm/src/lib.rs index f17c4b79d0..dddfe9bd33 100644 --- a/vmm/src/lib.rs +++ b/vmm/src/lib.rs @@ -423,6 +423,8 @@ pub fn feature_list() -> Vec { "tdx".to_string(), #[cfg(feature = "tracing")] "tracing".to_string(), + #[cfg(feature = "ivshmem")] + "ivshmem".to_string(), ] } @@ -2438,6 +2440,8 @@ mod unit_tests { preserved_fds: None, landlock_enable: false, landlock_rules: None, + #[cfg(feature = "ivshmem")] + ivshmem: None, }) } diff --git a/vmm/src/vm_config.rs b/vmm/src/vm_config.rs index 2d829a678a..50841eeed8 100644 --- a/vmm/src/vm_config.rs +++ b/vmm/src/vm_config.rs @@ -650,6 +650,26 @@ impl ApplyLandlock for VsockConfig { } } +#[cfg(feature = "ivshmem")] +pub const DEFAULT_IVSHMEM_SIZE: usize = 128; + +#[cfg(feature = "ivshmem")] +#[derive(Clone, Debug, PartialEq, Eq, Deserialize, Serialize)] +pub struct IvshmemConfig { + pub path: PathBuf, + pub size: usize, +} + +#[cfg(feature = "ivshmem")] +impl Default for IvshmemConfig { + fn default() -> Self { + Self { + path: PathBuf::new(), + size: DEFAULT_IVSHMEM_SIZE << 20, + } + } +} + #[cfg(target_arch = "x86_64")] #[derive(Clone, Debug, PartialEq, Eq, Deserialize, Serialize)] pub struct SgxEpcConfig { @@ -896,6 +916,8 @@ pub struct VmConfig { #[serde(default)] pub landlock_enable: bool, pub landlock_rules: Option>, + #[cfg(feature = "ivshmem")] + pub ivshmem: Option, } impl VmConfig { From f0febcae3da54a281cd287b2630a681ae433dddf Mon Sep 17 00:00:00 2001 From: Songqian Li Date: Thu, 29 Aug 2024 15:59:19 +0800 Subject: [PATCH 101/294] docs: add ivshmem device introduction Signed-off-by: Songqian Li --- docs/ivshmem.md | 51 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 51 insertions(+) create mode 100644 docs/ivshmem.md diff --git a/docs/ivshmem.md b/docs/ivshmem.md new file mode 100644 index 0000000000..3a7913c3f6 --- /dev/null +++ b/docs/ivshmem.md @@ -0,0 +1,51 @@ +# Inter-VM shared memory device + +The Inter-VM shared memory device (ivshmem) is designed to share a memory +region between a guest and the host. In order for all guests to be able to +pick up the shared memory area, it is modeled as a PCI device exposing said +memory to the guest as a PCI BAR. + +Device Specification is +at https://www.qemu.org/docs/master/specs/ivshmem-spec.html. + +Now we support setting a backend file to share data between host and guest. +In other words, we only support ivshmem-plain and ivshmem-doorbell is not +supported yet. + +## Usage + +`--ivshmem`, an optional argument, can be passed to enable ivshmem device. +This argument takes a file as a `path` value and a file size as a `size` value. + +``` +--ivshmem device backend file "path=,size="; +``` + +## Example + +Create a file with a size bigger than passed to `cloud-hypervisor`: + +``` +truncate -s 1M /tmp/ivshmem.data +``` + +Start application to mmap the file data to a memory region: + +``` +./cloud-hypervisor \ + --api-socket /tmp/cloud-hypervisor.sock \ + --kernel vmlinux \ + --disk path=focal-server-cloudimg-amd64.raw \ + --cpus boot=4 \ + --memory size=1024M \ + --ivshmem path=/tmp/ivshmem.data,size=1M +``` + +Insmod a ivshmem device driver to enable the device. The file data will be +mmapped to the PCI `bar2` of ivshmem device, +guest can r/w data by accessing this memory. + +A simple example of ivshmem driver can get from: +https://github.com/lisongqian/clh-linux/commits/ch-6.12.8-ivshmem + +The host process can r/w this data by remmaping the `/tmp/ivshmem.data`. From a09c8329fb18d0088da855391d3b67dde26c7d4f Mon Sep 17 00:00:00 2001 From: Songqian Li Date: Thu, 29 Aug 2024 16:26:48 +0800 Subject: [PATCH 102/294] fuzz: add ivshmem device config Signed-off-by: Songqian Li --- fuzz/Cargo.toml | 1 + fuzz/fuzz_targets/http_api.rs | 2 ++ 2 files changed, 3 insertions(+) diff --git a/fuzz/Cargo.toml b/fuzz/Cargo.toml index 8a7e1d4849..f8fa6fa4b7 100644 --- a/fuzz/Cargo.toml +++ b/fuzz/Cargo.toml @@ -11,6 +11,7 @@ cargo-fuzz = true [features] default = ["mshv_emulator"] igvm = [] +ivshmem = [] mshv_emulator = ["hypervisor/mshv_emulator"] pvmemcontrol = [] diff --git a/fuzz/fuzz_targets/http_api.rs b/fuzz/fuzz_targets/http_api.rs index 5c146ad9d8..ee8fa52376 100644 --- a/fuzz/fuzz_targets/http_api.rs +++ b/fuzz/fuzz_targets/http_api.rs @@ -197,6 +197,8 @@ impl RequestHandler for StubApiRequestHandler { preserved_fds: None, landlock_enable: false, landlock_rules: None, + #[cfg(feature = "ivshmem")] + ivshmem: None, }), state: VmState::Running, memory_actual_size: 0, From 4c1ee0329e9e2b6fb632a08b31514bd2cdd9e68b Mon Sep 17 00:00:00 2001 From: Songqian Li Date: Tue, 17 Jun 2025 15:26:28 +0800 Subject: [PATCH 103/294] tests: add ivshmem integration test case Signed-off-by: Songqian Li --- scripts/run_integration_tests_aarch64.sh | 7 + scripts/run_integration_tests_x86_64.sh | 7 + test_infra/src/lib.rs | 24 ++ tests/integration.rs | 373 ++++++++++++++++++++++- 4 files changed, 408 insertions(+), 3 deletions(-) diff --git a/scripts/run_integration_tests_aarch64.sh b/scripts/run_integration_tests_aarch64.sh index 262faff9a4..758c69c6b2 100755 --- a/scripts/run_integration_tests_aarch64.sh +++ b/scripts/run_integration_tests_aarch64.sh @@ -258,4 +258,11 @@ if [ $RES -eq 0 ]; then RES=$? fi +if [ $RES -eq 0 ]; then + cargo build --features ivshmem --all --release --target "$BUILD_TARGET" + export RUST_BACKTRACE=1 + time cargo test "ivshmem::$test_filter" --target "$BUILD_TARGET" -- ${test_binary_args[*]} + RES=$? +fi + exit $RES diff --git a/scripts/run_integration_tests_x86_64.sh b/scripts/run_integration_tests_x86_64.sh index 4f4491aa7f..3f28e23cdf 100755 --- a/scripts/run_integration_tests_x86_64.sh +++ b/scripts/run_integration_tests_x86_64.sh @@ -206,4 +206,11 @@ if [ $RES -eq 0 ]; then RES=$? fi +if [ $RES -eq 0 ]; then + cargo build --features ivshmem --all --release --target "$BUILD_TARGET" + export RUST_BACKTRACE=1 + time cargo test $test_features "ivshmem::$test_filter" --target "$BUILD_TARGET" -- ${test_binary_args[*]} + RES=$? +fi + exit $RES diff --git a/test_infra/src/lib.rs b/test_infra/src/lib.rs index df47de835c..6875aa5b22 100644 --- a/test_infra/src/lib.rs +++ b/test_infra/src/lib.rs @@ -1776,3 +1776,27 @@ pub fn measure_virtio_net_latency(guest: &Guest, test_timeout: u32) -> Result Option { + let devices: Vec<&str> = output.split("\n\n").collect(); + + for device in devices { + if device.contains(device_desc) { + for line in device.lines() { + let line = line.trim(); + let line_start_str = format!("Region {bar_index}: Memory at"); + // for example: Region 2: Memory at 200000000 (64-bit, non-prefetchable) [size=1M] + if line.starts_with(line_start_str.as_str()) { + let parts: Vec<&str> = line.split_whitespace().collect(); + if parts.len() >= 4 { + let addr_str = parts[4]; + return Some(String::from(addr_str)); + } + } + } + } + } + None +} diff --git a/tests/integration.rs b/tests/integration.rs index dc19b7aee2..d604a5f7c9 100644 --- a/tests/integration.rs +++ b/tests/integration.rs @@ -11,7 +11,9 @@ extern crate test_infra; use std::collections::HashMap; -use std::io::{BufRead, Read, Seek, Write}; +use std::ffi::CStr; +use std::fs::OpenOptions; +use std::io::{BufRead, Read, Seek, SeekFrom, Write}; use std::net::TcpListener; use std::os::unix::io::AsRawFd; use std::path::PathBuf; @@ -2341,6 +2343,147 @@ fn make_guest_panic(guest: &Guest) { guest.ssh_command("screen -dmS reboot sh -c \"sleep 5; echo s | tee /proc/sysrq-trigger; echo c | sudo tee /proc/sysrq-trigger\"").unwrap(); } +// ivshmem test +// This case validates that read data from host(host write data to ivshmem backend file, +// guest read data from ivshmem pci bar2 memory) +// and write data to host(guest write data to ivshmem pci bar2 memory, host read it from +// ivshmem backend file). +// It also checks the size of the shared memory region. +fn _test_ivshmem(guest: &Guest, ivshmem_file_path: String, file_size: &str) { + let test_message_read = String::from("ivshmem device test data read"); + // Modify backend file data before function test + let mut file = OpenOptions::new() + .read(true) + .write(true) + .open(ivshmem_file_path.as_str()) + .unwrap(); + file.seek(SeekFrom::Start(0)).unwrap(); + file.write_all(test_message_read.as_bytes()).unwrap(); + file.write_all(b"\0").unwrap(); + file.flush().unwrap(); + + let output = fs::read_to_string(ivshmem_file_path.as_str()).unwrap(); + let nul_pos = output.as_bytes().iter().position(|&b| b == 0).unwrap(); + let c_str = CStr::from_bytes_until_nul(&output.as_bytes()[..=nul_pos]).unwrap(); + let file_message = c_str.to_string_lossy().to_string(); + // Check if the backend file data is correct + assert_eq!(test_message_read, file_message); + + let device_id_line = String::from( + guest + .ssh_command("lspci -D | grep \"Inter-VM shared memory\"") + .unwrap() + .trim(), + ); + // Check if ivshmem exists + assert!(!device_id_line.is_empty()); + let device_id = device_id_line.split(" ").next().unwrap(); + // Check shard memory size + assert_eq!( + guest + .ssh_command( + format!("lspci -vv -s {device_id} | grep -c \"Region 2.*size={file_size}\"") + .as_str(), + ) + .unwrap() + .trim() + .parse::() + .unwrap_or_default(), + 1 + ); + + // guest don't have gcc or g++, try to use python to test :( + // This python program try to mmap the ivshmem pci bar2 memory and read the data from it. + let ivshmem_test_read = format!( + r#" +import os +import mmap +from ctypes import create_string_buffer, c_char, memmove + +if __name__ == "__main__": + device_path = f"/sys/bus/pci/devices/{device_id}/resource2" + fd = os.open(device_path, os.O_RDWR | os.O_SYNC) + + PAGE_SIZE = os.sysconf('SC_PAGESIZE') + + with mmap.mmap(fd, PAGE_SIZE, flags=mmap.MAP_SHARED, + prot=mmap.PROT_READ | mmap.PROT_WRITE, offset=0) as shmem: + c_buf = (c_char * PAGE_SIZE).from_buffer(shmem) + null_pos = c_buf.raw.find(b'\x00') + valid_data = c_buf.raw[:null_pos] if null_pos != -1 else c_buf.raw + print(valid_data.decode('utf-8', errors='replace'), end="") + shmem.flush() + del c_buf + + os.close(fd) + "# + ); + guest + .ssh_command( + format!( + r#"cat << EOF > test_read.py +{ivshmem_test_read} +EOF +"# + ) + .as_str(), + ) + .unwrap(); + let guest_message = guest.ssh_command("sudo python3 test_read.py").unwrap(); + + // Check the probe message in host and guest + assert_eq!(test_message_read, guest_message); + + let test_message_write = "ivshmem device test data write"; + // Then the program writes a test message to the memory and flush it. + let ivshmem_test_write = format!( + r#" +import os +import mmap +from ctypes import create_string_buffer, c_char, memmove + +if __name__ == "__main__": + device_path = f"/sys/bus/pci/devices/{device_id}/resource2" + test_message = "{test_message_write}" + fd = os.open(device_path, os.O_RDWR | os.O_SYNC) + + PAGE_SIZE = os.sysconf('SC_PAGESIZE') + + with mmap.mmap(fd, PAGE_SIZE, flags=mmap.MAP_SHARED, + prot=mmap.PROT_READ | mmap.PROT_WRITE, offset=0) as shmem: + shmem.flush() + c_buf = (c_char * PAGE_SIZE).from_buffer(shmem) + encoded_msg = test_message.encode('utf-8').ljust(1000, b'\x00') + memmove(c_buf, encoded_msg, len(encoded_msg)) + shmem.flush() + del c_buf + + os.close(fd) + "# + ); + + guest + .ssh_command( + format!( + r#"cat << EOF > test_write.py +{ivshmem_test_write} +EOF +"# + ) + .as_str(), + ) + .unwrap(); + + let _ = guest.ssh_command("sudo python3 test_write.py").unwrap(); + + let output = fs::read_to_string(ivshmem_file_path.as_str()).unwrap(); + let nul_pos = output.as_bytes().iter().position(|&b| b == 0).unwrap(); + let c_str = CStr::from_bytes_until_nul(&output.as_bytes()[..=nul_pos]).unwrap(); + let file_message = c_str.to_string_lossy().to_string(); + // Check to send data from guest to host + assert_eq!(test_message_write, file_message); +} + mod common_parallel { use std::fs::OpenOptions; use std::io::SeekFrom; @@ -7275,6 +7418,226 @@ mod dbus_api { } } +mod ivshmem { + use std::fs::remove_dir_all; + use std::process::Command; + + use test_infra::{handle_child_output, kill_child, Guest, GuestCommand, UbuntuDiskConfig}; + + use crate::*; + + #[test] + fn test_ivshmem() { + let focal = UbuntuDiskConfig::new(FOCAL_IMAGE_NAME.to_string()); + let guest = Guest::new(Box::new(focal)); + let api_socket = temp_api_path(&guest.tmp_dir); + + let kernel_path = direct_kernel_boot_path(); + + let ivshmem_file_path = String::from( + guest + .tmp_dir + .as_path() + .join("ivshmem.data") + .to_str() + .unwrap(), + ); + let file_size = "1M"; + + // Create a file to be used as the shared memory + Command::new("dd") + .args([ + "if=/dev/zero", + format!("of={ivshmem_file_path}").as_str(), + format!("bs={file_size}").as_str(), + "count=1", + ]) + .status() + .unwrap(); + + let mut child = GuestCommand::new(&guest) + .args(["--cpus", "boot=2"]) + .args(["--memory", "size=512M"]) + .args(["--kernel", kernel_path.to_str().unwrap()]) + .args(["--cmdline", DIRECT_KERNEL_BOOT_CMDLINE]) + .default_disks() + .default_net() + .args([ + "--ivshmem", + format!("path={ivshmem_file_path},size={file_size}").as_str(), + ]) + .args(["--api-socket", &api_socket]) + .capture_output() + .spawn() + .unwrap(); + + let r = std::panic::catch_unwind(|| { + guest.wait_vm_boot(None).unwrap(); + _test_ivshmem(&guest, ivshmem_file_path, file_size); + }); + kill_child(&mut child); + let output = child.wait_with_output().unwrap(); + + handle_child_output(r, &output); + } + + #[test] + fn test_snapshot_restore_ivshmem() { + let focal = UbuntuDiskConfig::new(FOCAL_IMAGE_NAME.to_string()); + let guest = Guest::new(Box::new(focal)); + let kernel_path = direct_kernel_boot_path(); + + let api_socket_source = format!("{}.1", temp_api_path(&guest.tmp_dir)); + + let ivshmem_file_path = String::from( + guest + .tmp_dir + .as_path() + .join("ivshmem.data") + .to_str() + .unwrap(), + ); + let file_size = "1M"; + + let device_params = { + let mut data = vec![]; + // Create a file to be used as the shared memory + Command::new("dd") + .args([ + "if=/dev/zero", + format!("of={ivshmem_file_path}").as_str(), + format!("bs={file_size}").as_str(), + "count=1", + ]) + .status() + .unwrap(); + data.push(String::from("--ivshmem")); + data.push(format!("path={ivshmem_file_path},size={file_size}")); + data + }; + + let socket = temp_vsock_path(&guest.tmp_dir); + let event_path = temp_event_monitor_path(&guest.tmp_dir); + + let mut child = GuestCommand::new(&guest) + .args(["--api-socket", &api_socket_source]) + .args(["--event-monitor", format!("path={event_path}").as_str()]) + .args(["--cpus", "boot=2"]) + .args(["--memory", "size=1G"]) + .args(["--kernel", kernel_path.to_str().unwrap()]) + .default_disks() + .default_net() + .args(["--vsock", format!("cid=3,socket={socket}").as_str()]) + .args(["--cmdline", DIRECT_KERNEL_BOOT_CMDLINE]) + .args(device_params) + .capture_output() + .spawn() + .unwrap(); + + let console_text = String::from("On a branch floating down river a cricket, singing."); + // Create the snapshot directory + let snapshot_dir = temp_snapshot_dir_path(&guest.tmp_dir); + + let r = std::panic::catch_unwind(|| { + guest.wait_vm_boot(None).unwrap(); + + // Check the number of vCPUs + assert_eq!(guest.get_cpu_count().unwrap_or_default(), 2); + + common_sequential::snapshot_and_check_events( + &api_socket_source, + &snapshot_dir, + &event_path, + ); + }); + + // Shutdown the source VM and check console output + kill_child(&mut child); + let output = child.wait_with_output().unwrap(); + handle_child_output(r, &output); + + // Remove the vsock socket file. + Command::new("rm") + .arg("-f") + .arg(socket.as_str()) + .output() + .unwrap(); + + let api_socket_restored = format!("{}.2", temp_api_path(&guest.tmp_dir)); + let event_path_restored = format!("{}.2", temp_event_monitor_path(&guest.tmp_dir)); + + // Restore the VM from the snapshot + let mut child = GuestCommand::new(&guest) + .args(["--api-socket", &api_socket_restored]) + .args([ + "--event-monitor", + format!("path={event_path_restored}").as_str(), + ]) + .args([ + "--restore", + format!("source_url=file://{snapshot_dir}").as_str(), + ]) + .capture_output() + .spawn() + .unwrap(); + + // Wait for the VM to be restored + thread::sleep(std::time::Duration::new(20, 0)); + + let latest_events = [&MetaEvent { + event: "restored".to_string(), + device_id: None, + }]; + assert!(check_latest_events_exact( + &latest_events, + &event_path_restored + )); + + // Remove the snapshot dir + let _ = remove_dir_all(snapshot_dir.as_str()); + + let r = std::panic::catch_unwind(|| { + // Resume the VM + assert!(remote_command(&api_socket_restored, "resume", None)); + // There is no way that we can ensure the 'write()' to the + // event file is completed when the 'resume' request is + // returned successfully, because the 'write()' was done + // asynchronously from a different thread of Cloud + // Hypervisor (e.g. the event-monitor thread). + thread::sleep(std::time::Duration::new(1, 0)); + let latest_events = [ + &MetaEvent { + event: "resuming".to_string(), + device_id: None, + }, + &MetaEvent { + event: "resumed".to_string(), + device_id: None, + }, + ]; + assert!(check_latest_events_exact( + &latest_events, + &event_path_restored + )); + + // Check the number of vCPUs + assert_eq!(guest.get_cpu_count().unwrap_or_default(), 2); + guest.check_devices_common(Some(&socket), Some(&console_text), None); + _test_ivshmem(&guest, ivshmem_file_path, file_size); + }); + // Shutdown the target VM and check console output + kill_child(&mut child); + let output = child.wait_with_output().unwrap(); + handle_child_output(r, &output); + + let r = std::panic::catch_unwind(|| { + assert!(String::from_utf8_lossy(&output.stdout).contains(&console_text)); + }); + + handle_child_output(r, &output); + } +} + mod common_sequential { use std::fs::remove_dir_all; @@ -7286,7 +7649,11 @@ mod common_sequential { test_memory_mergeable(true) } - fn snapshot_and_check_events(api_socket: &str, snapshot_dir: &str, event_path: &str) { + pub(crate) fn snapshot_and_check_events( + api_socket: &str, + snapshot_dir: &str, + event_path: &str, + ) { // Pause the VM assert!(remote_command(api_socket, "pause", None)); let latest_events: [&MetaEvent; 2] = [ @@ -7833,7 +8200,7 @@ mod common_sequential { let device_params = { let mut data = vec![]; if pvpanic { - data.push("--pvpanic"); + data.push(String::from("--pvpanic")); } data }; From 51e095c54d056e1b3f9ece325b2edbd8066a3baf Mon Sep 17 00:00:00 2001 From: Songqian Li Date: Tue, 17 Jun 2025 15:26:45 +0800 Subject: [PATCH 104/294] ci: add build and quality check for ivshmem feature Signed-off-by: Songqian Li --- .github/workflows/build.yaml | 3 +++ .github/workflows/quality.yaml | 10 ++++++++++ 2 files changed, 13 insertions(+) diff --git a/.github/workflows/build.yaml b/.github/workflows/build.yaml index c78750f78d..428740dcd0 100644 --- a/.github/workflows/build.yaml +++ b/.github/workflows/build.yaml @@ -55,6 +55,9 @@ jobs: - name: Build (default features + fw_cfg) run: cargo rustc --locked --bin cloud-hypervisor --features "fw_cfg" -- -D warnings -D clippy::undocumented_unsafe_blocks -W clippy::assertions_on_result_states + - name: Build (default features + ivshmem) + run: cargo rustc --locked --bin cloud-hypervisor --features "ivshmem" -- -D warnings -D clippy::undocumented_unsafe_blocks -W clippy::assertions_on_result_states + - name: Build (mshv) run: cargo rustc --locked --bin cloud-hypervisor --no-default-features --features "mshv" -- -D warnings -D clippy::undocumented_unsafe_blocks -W clippy::assertions_on_result_states diff --git a/.github/workflows/quality.yaml b/.github/workflows/quality.yaml index fb733b1a09..bdd407b765 100644 --- a/.github/workflows/quality.yaml +++ b/.github/workflows/quality.yaml @@ -117,6 +117,16 @@ jobs: use-cross: ${{ matrix.target != 'x86_64-unknown-linux-gnu' }} command: clippy args: --target=${{ matrix.target }} --locked --all --all-targets --tests --examples --features "fw_cfg" -- -D warnings -D clippy::undocumented_unsafe_blocks -W clippy::assertions_on_result_states + + - name: Clippy (default features + ivshmem) + uses: houseabsolute/actions-rust-cross@v1 + with: + command: clippy + cross-version: 3e0957637b49b1bbced23ad909170650c5b70635 + toolchain: ${{ matrix.rust }} + target: ${{ matrix.target }} + args: --locked --all --all-targets --tests --examples --features "ivshmem" -- -D warnings -D clippy::undocumented_unsafe_blocks -W clippy::assertions_on_result_states + - name: Clippy (sev_snp) if: ${{ matrix.target == 'x86_64-unknown-linux-gnu' }} uses: houseabsolute/actions-rust-cross@v1 From 5128ee9ba6443f985a596624bcf6d15c24973852 Mon Sep 17 00:00:00 2001 From: Songqian Li Date: Tue, 8 Jul 2025 11:53:16 +0800 Subject: [PATCH 105/294] devices, vmm: Handle ivshmem bar reprogramming properly Signed-off-by: Bo Chen Signed-off-by: Songqian Li --- devices/src/ivshmem.rs | 99 ++++++++++++++++++++++++++++++-------- vmm/src/device_manager.rs | 88 ++++++++++++++++++++++++++++++++- vmm/src/seccomp_filters.rs | 1 - 3 files changed, 165 insertions(+), 23 deletions(-) diff --git a/devices/src/ivshmem.rs b/devices/src/ivshmem.rs index 6e9ef4293e..fc7c88de31 100644 --- a/devices/src/ivshmem.rs +++ b/devices/src/ivshmem.rs @@ -23,7 +23,6 @@ use vm_device::{BusDevice, Resource, UserspaceMapping}; use vm_memory::bitmap::AtomicBitmap; use vm_memory::{Address, GuestAddress}; use vm_migration::{Migratable, MigratableError, Pausable, Snapshot, Snapshottable, Transportable}; -use vmm_sys_util::eventfd::EventFd; const IVSHMEM_BAR0_IDX: usize = 0; const IVSHMEM_BAR1_IDX: usize = 1; @@ -42,6 +41,14 @@ pub enum IvshmemError { RetrievePciConfigurationState(#[source] anyhow::Error), #[error("Failed to retrieve IvshmemDeviceState: {0}")] RetrieveIvshmemDeviceStateState(#[source] anyhow::Error), + #[error("Failed to remove user memory region")] + RemoveUserMemoryRegion, + #[error("Failed to create user memory region.")] + CreateUserMemoryRegion, + #[error("Failed to create userspace mapping.")] + CreateUserspaceMapping, + #[error("Failed to remove old userspace mapping.")] + RemoveUserspaceMapping, } #[derive(Copy, Clone)] @@ -55,23 +62,41 @@ impl PciSubclass for IvshmemSubclass { } } +pub trait IvshmemOps: Send + Sync { + fn map_ram_region( + &mut self, + start_addr: u64, + size: usize, + backing_file: Option, + ) -> Result<(Arc, UserspaceMapping), IvshmemError>; + + fn unmap_ram_region(&mut self, mapping: UserspaceMapping) -> Result<(), IvshmemError>; +} + +/// Inner-Vm Shared Memory Device (Ivshmem device) +/// +/// This device can share memory between host and guest(ivshmem-plain) +/// and share memory between guests(ivshmem-doorbell). +/// But only ivshmem-plain support now, ivshmem-doorbell doesn't support yet. pub struct IvshmemDevice { id: String, // ivshmem device registers - interrupt_mask: u32, - interrupt_status: Arc, - iv_position: u32, - doorbell: u32, + // (only used for ivshmem-doorbell, ivshmem-doorbell don't support yet) + _interrupt_mask: u32, + _interrupt_status: Arc, + _iv_position: u32, + _doorbell: u32, // PCI configuration registers. configuration: PciConfiguration, bar_regions: Vec, - region: Option>, region_size: u64, + ivshmem_ops: Arc>, + backend_file: Option, + region: Option>, userspace_mapping: Option, - reprogram_evt: EventFd, } #[derive(Serialize, Deserialize, Default, Clone)] @@ -86,13 +111,14 @@ impl IvshmemDevice { pub fn new( id: String, region_size: u64, + backend_file: Option, + ivshmem_ops: Arc>, snapshot: Option, ) -> Result { let pci_configuration_state = vm_migration::state_from_id(snapshot.as_ref(), PCI_CONFIGURATION_ID).map_err(|e| { IvshmemError::RetrievePciConfigurationState(anyhow!( "Failed to get PciConfigurationState from Snapshot: {e}", - e )) })?; @@ -125,31 +151,44 @@ impl IvshmemDevice { id, configuration, bar_regions: vec![], - interrupt_mask: s.interrupt_mask, - interrupt_status: Arc::new(AtomicU32::new(s.interrupt_status)), - iv_position: s.iv_position, - doorbell: s.doorbell, + _interrupt_mask: s.interrupt_mask, + _interrupt_status: Arc::new(AtomicU32::new(s.interrupt_status)), + _iv_position: s.iv_position, + _doorbell: s.doorbell, region_size, + ivshmem_ops, region: None, userspace_mapping: None, + backend_file, } } else { IvshmemDevice { id, configuration, bar_regions: vec![], - interrupt_mask: 0, - interrupt_status: Arc::new(AtomicU32::new(0)), - iv_position: 0, - doorbell: 0, + _interrupt_mask: 0, + _interrupt_status: Arc::new(AtomicU32::new(0)), + _iv_position: 0, + _doorbell: 0, region_size, + ivshmem_ops, region: None, userspace_mapping: None, + backend_file, } }; Ok(device) } + pub fn set_region( + &mut self, + region: Arc, + userspace_mapping: UserspaceMapping, + ) { + self.region = Some(region); + self.userspace_mapping = Some(userspace_mapping); + } + pub fn config_bar_addr(&self) -> u64 { self.configuration.get_bar_addr(IVSHMEM_BAR0_IDX) } @@ -160,10 +199,10 @@ impl IvshmemDevice { fn state(&self) -> IvshmemDeviceState { IvshmemDeviceState { - interrupt_mask: self.interrupt_mask, - interrupt_status: self.interrupt_status.load(Ordering::SeqCst), - iv_position: self.iv_position, - doorbell: self.doorbell, + interrupt_mask: self._interrupt_mask, + interrupt_status: self._interrupt_status.load(Ordering::SeqCst), + iv_position: self._iv_position, + doorbell: self._doorbell, } } } @@ -319,6 +358,26 @@ impl PciDevice for IvshmemDevice { } fn move_bar(&mut self, old_base: u64, new_base: u64) -> result::Result<(), std::io::Error> { + if new_base == self.data_bar_addr() { + if let Some(old_mapping) = self.userspace_mapping.take() { + self.ivshmem_ops + .lock() + .unwrap() + .unmap_ram_region(old_mapping) + .map_err(std::io::Error::other)?; + } + let (region, new_mapping) = self + .ivshmem_ops + .lock() + .unwrap() + .map_ram_region( + new_base, + self.region_size as usize, + self.backend_file.clone(), + ) + .map_err(std::io::Error::other)?; + self.set_region(region, new_mapping); + } for bar in self.bar_regions.iter_mut() { if bar.addr() == old_base { *bar = bar.set_address(new_base); diff --git a/vmm/src/device_manager.rs b/vmm/src/device_manager.rs index 9f32da8ba4..c097a25974 100644 --- a/vmm/src/device_manager.rs +++ b/vmm/src/device_manager.rs @@ -53,6 +53,8 @@ use devices::gic; use devices::interrupt_controller::InterruptController; #[cfg(target_arch = "x86_64")] use devices::ioapic; +#[cfg(feature = "ivshmem")] +use devices::ivshmem::{IvshmemError, IvshmemOps}; #[cfg(all(feature = "fw_cfg", target_arch = "x86_64"))] use devices::legacy::fw_cfg::FW_CFG_ACPI_ID; #[cfg(target_arch = "aarch64")] @@ -639,7 +641,7 @@ pub enum DeviceManagerError { #[cfg(feature = "ivshmem")] /// Cannot create a ivshmem device #[error("Cannot create a ivshmem device: {0}")] - IvshmemCreate(devices::ivshmem::IvshmemError), + IvshmemCreate(IvshmemError), /// Cannot create a RateLimiterGroup #[error("Cannot create a RateLimiterGroup")] @@ -4232,11 +4234,16 @@ impl DeviceManager { self.pci_resources(&id, pci_segment_id)?; let snapshot = snapshot_from_id(self.snapshot.as_ref(), id.as_str()); + let ivshmem_ops = Arc::new(Mutex::new(IvshmemHandler { + memory_manager: self.memory_manager.clone(), + })); let ivshmem_device = Arc::new(Mutex::new( devices::IvshmemDevice::new( id.clone(), ivshmem_cfg.size as u64, - snapshot, + Some(ivshmem_cfg.path.clone()), + ivshmem_ops.clone(), + snapshot, ) .map_err(DeviceManagerError::IvshmemCreate)?, )); @@ -4247,6 +4254,15 @@ impl DeviceManager { pci_device_bdf, resources, )?; + + let start_addr = ivshmem_device.lock().unwrap().data_bar_addr(); + let (region, mapping) = ivshmem_ops + .lock() + .unwrap() + .map_ram_region(start_addr, ivshmem_cfg.size, Some(ivshmem_cfg.path.clone())) + .map_err(DeviceManagerError::IvshmemCreate)?; + ivshmem_device.lock().unwrap().set_region(region, mapping); + let mut node = device_node!(id, ivshmem_device); node.resources = new_resources; node.pci_bdf = Some(pci_device_bdf); @@ -4929,6 +4945,74 @@ impl DeviceManager { } } +#[cfg(feature = "ivshmem")] +struct IvshmemHandler { + memory_manager: Arc>, +} + +#[cfg(feature = "ivshmem")] +impl IvshmemOps for IvshmemHandler { + fn map_ram_region( + &mut self, + start_addr: u64, + size: usize, + backing_file: Option, + ) -> Result<(Arc, UserspaceMapping), IvshmemError> { + info!("Creating ivshmem mem region at 0x{:x}", start_addr); + + let region = MemoryManager::create_ram_region( + &backing_file, + 0, + GuestAddress(start_addr), + size, + false, + true, + false, + None, + None, + None, + false, + ) + .map_err(|_| IvshmemError::CreateUserMemoryRegion)?; + let mem_slot = self + .memory_manager + .lock() + .unwrap() + .create_userspace_mapping( + region.start_addr().0, + region.len(), + region.as_ptr() as u64, + false, + false, + false, + ) + .map_err(|_| IvshmemError::CreateUserspaceMapping)?; + let mapping = UserspaceMapping { + host_addr: region.as_ptr() as u64, + mem_slot, + addr: GuestAddress(region.start_addr().0), + len: region.len(), + mergeable: false, + }; + Ok((region, mapping)) + } + + fn unmap_ram_region(&mut self, mapping: UserspaceMapping) -> Result<(), IvshmemError> { + self.memory_manager + .lock() + .unwrap() + .remove_userspace_mapping( + mapping.addr.raw_value(), + mapping.len, + mapping.host_addr, + mapping.mergeable, + mapping.mem_slot, + ) + .map_err(|_| IvshmemError::RemoveUserspaceMapping)?; + Ok(()) + } +} + fn numa_node_id_from_memory_zone_id(numa_nodes: &NumaNodes, memory_zone_id: &str) -> Option { for (numa_node_id, numa_node) in numa_nodes.iter() { if numa_node.memory_zones.contains(&memory_zone_id.to_owned()) { diff --git a/vmm/src/seccomp_filters.rs b/vmm/src/seccomp_filters.rs index 40748f0d0b..1f0a6a47e4 100644 --- a/vmm/src/seccomp_filters.rs +++ b/vmm/src/seccomp_filters.rs @@ -835,7 +835,6 @@ fn vcpu_thread_rules( (libc::SYS_unlinkat, vec![]), (libc::SYS_write, vec![]), (libc::SYS_writev, vec![]), - #[cfg(debug_assertions)] (libc::SYS_fcntl, vec![]), (libc::SYS_getcwd, vec![]), ]) From 9011ff21617d797de0d832078fd00f54558034ed Mon Sep 17 00:00:00 2001 From: Songqian Li Date: Mon, 7 Jul 2025 23:18:37 +0800 Subject: [PATCH 106/294] tests: add ivshmem live migration test case Signed-off-by: Songqian Li --- tests/integration.rs | 235 +++++++++++++++++++++++++++++++++++++++---- 1 file changed, 216 insertions(+), 19 deletions(-) diff --git a/tests/integration.rs b/tests/integration.rs index d604a5f7c9..13e612485b 100644 --- a/tests/integration.rs +++ b/tests/integration.rs @@ -7426,6 +7426,191 @@ mod ivshmem { use crate::*; + fn _test_live_migration_ivshmem(local: bool) { + let focal = UbuntuDiskConfig::new(FOCAL_IMAGE_NAME.to_string()); + let guest = Guest::new(Box::new(focal)); + let kernel_path = direct_kernel_boot_path(); + let console_text = String::from("On a branch floating down river a cricket, singing."); + let net_id = "net123"; + let net_params = format!( + "id={},tap=,mac={},ip={},mask=255.255.255.0", + net_id, guest.network.guest_mac, guest.network.host_ip + ); + + let memory_param: &[&str] = if local { + &["--memory", "size=4G,shared=on"] + } else { + &["--memory", "size=4G"] + }; + + let boot_vcpus = 2; + let max_vcpus = 4; + + let pmem_temp_file = TempFile::new().unwrap(); + pmem_temp_file.as_file().set_len(128 << 20).unwrap(); + std::process::Command::new("mkfs.ext4") + .arg(pmem_temp_file.as_path()) + .output() + .expect("Expect creating disk image to succeed"); + let pmem_path = String::from("/dev/pmem0"); + + let ivshmem_file_path = String::from( + guest + .tmp_dir + .as_path() + .join("ivshmem.data") + .to_str() + .unwrap(), + ); + let file_size = "1M"; + + // Create a file to be used as the shared memory + Command::new("dd") + .args([ + "if=/dev/zero", + format!("of={ivshmem_file_path}").as_str(), + format!("bs={file_size}").as_str(), + "count=1", + ]) + .status() + .unwrap(); + + // Start the source VM + let src_vm_path = clh_command("cloud-hypervisor"); + let src_api_socket = temp_api_path(&guest.tmp_dir); + let mut src_vm_cmd = GuestCommand::new_with_binary_path(&guest, &src_vm_path); + src_vm_cmd + .args([ + "--cpus", + format!("boot={boot_vcpus},max={max_vcpus}").as_str(), + ]) + .args(memory_param) + .args(["--kernel", kernel_path.to_str().unwrap()]) + .args(["--cmdline", DIRECT_KERNEL_BOOT_CMDLINE]) + .default_disks() + .args(["--net", net_params.as_str()]) + .args(["--api-socket", &src_api_socket]) + .args([ + "--pmem", + format!("file={}", pmem_temp_file.as_path().to_str().unwrap(),).as_str(), + ]) + .args([ + "--ivshmem", + format!("path={ivshmem_file_path},size={file_size}").as_str(), + ]); + let mut src_child = src_vm_cmd.capture_output().spawn().unwrap(); + + // Start the destination VM + let mut dest_api_socket = temp_api_path(&guest.tmp_dir); + dest_api_socket.push_str(".dest"); + let mut dest_child = GuestCommand::new(&guest) + .args(["--api-socket", &dest_api_socket]) + .capture_output() + .spawn() + .unwrap(); + + let r = std::panic::catch_unwind(|| { + guest.wait_vm_boot(None).unwrap(); + + // Make sure the source VM is functional + // Check the number of vCPUs + assert_eq!(guest.get_cpu_count().unwrap_or_default(), boot_vcpus); + // Check the guest RAM + assert!(guest.get_total_memory().unwrap_or_default() > 3_840_000); + // Check the guest virtio-devices, e.g. block, rng, console, and net + guest.check_devices_common(None, Some(&console_text), Some(&pmem_path)); + // x86_64: Following what's done in the `test_snapshot_restore`, we need + // to make sure that removing and adding back the virtio-net device does + // not break the live-migration support for virtio-pci. + #[cfg(target_arch = "x86_64")] + { + assert!(remote_command( + &src_api_socket, + "remove-device", + Some(net_id), + )); + thread::sleep(Duration::new(10, 0)); + + // Plug the virtio-net device again + assert!(remote_command( + &src_api_socket, + "add-net", + Some(net_params.as_str()), + )); + thread::sleep(Duration::new(10, 0)); + } + + // Check ivshmem device in src guest. + _test_ivshmem(&guest, ivshmem_file_path.clone(), file_size); + // Allow some normal time to elapse to check we don't get spurious reboots + thread::sleep(std::time::Duration::new(40, 0)); + + // Start the live-migration + let migration_socket = String::from( + guest + .tmp_dir + .as_path() + .join("live-migration.sock") + .to_str() + .unwrap(), + ); + + assert!( + live_migration::start_live_migration( + &migration_socket, + &src_api_socket, + &dest_api_socket, + local + ), + "Unsuccessful command: 'send-migration' or 'receive-migration'." + ); + }); + + // Check and report any errors occurred during the live-migration + if r.is_err() { + live_migration::print_and_panic( + src_child, + dest_child, + None, + "Error occurred during live-migration", + ); + } + + // Check the source vm has been terminated successful (give it '3s' to settle) + thread::sleep(std::time::Duration::new(3, 0)); + if !src_child.try_wait().unwrap().is_some_and(|s| s.success()) { + live_migration::print_and_panic( + src_child, + dest_child, + None, + "source VM was not terminated successfully.", + ); + }; + + // Post live-migration check to make sure the destination VM is functional + let r = std::panic::catch_unwind(|| { + // Perform same checks to validate VM has been properly migrated + assert_eq!(guest.get_cpu_count().unwrap_or_default(), boot_vcpus); + assert!(guest.get_total_memory().unwrap_or_default() > 3_840_000); + + guest.check_devices_common(None, Some(&console_text), Some(&pmem_path)); + + // Check ivshmem device + _test_ivshmem(&guest, ivshmem_file_path, file_size); + }); + + // Clean-up the destination VM and make sure it terminated correctly + let _ = dest_child.kill(); + let dest_output = dest_child.wait_with_output().unwrap(); + handle_child_output(r, &dest_output); + + // Check the destination VM has the expected 'console_text' from its output + let r = std::panic::catch_unwind(|| { + assert!(String::from_utf8_lossy(&dest_output.stdout).contains(&console_text)); + }); + handle_child_output(r, &dest_output); + } + #[test] fn test_ivshmem() { let focal = UbuntuDiskConfig::new(FOCAL_IMAGE_NAME.to_string()); @@ -7499,22 +7684,16 @@ mod ivshmem { ); let file_size = "1M"; - let device_params = { - let mut data = vec![]; - // Create a file to be used as the shared memory - Command::new("dd") - .args([ - "if=/dev/zero", - format!("of={ivshmem_file_path}").as_str(), - format!("bs={file_size}").as_str(), - "count=1", - ]) - .status() - .unwrap(); - data.push(String::from("--ivshmem")); - data.push(format!("path={ivshmem_file_path},size={file_size}")); - data - }; + // Create a file to be used as the shared memory + Command::new("dd") + .args([ + "if=/dev/zero", + format!("of={ivshmem_file_path}").as_str(), + format!("bs={file_size}").as_str(), + "count=1", + ]) + .status() + .unwrap(); let socket = temp_vsock_path(&guest.tmp_dir); let event_path = temp_event_monitor_path(&guest.tmp_dir); @@ -7529,7 +7708,10 @@ mod ivshmem { .default_net() .args(["--vsock", format!("cid=3,socket={socket}").as_str()]) .args(["--cmdline", DIRECT_KERNEL_BOOT_CMDLINE]) - .args(device_params) + .args([ + "--ivshmem", + format!("path={ivshmem_file_path},size={file_size}").as_str(), + ]) .capture_output() .spawn() .unwrap(); @@ -7636,6 +7818,16 @@ mod ivshmem { handle_child_output(r, &output); } + + #[test] + fn test_live_migration_ivshmem() { + _test_live_migration_ivshmem(false) + } + + #[test] + fn test_live_migration_ivshmem_local() { + _test_live_migration_ivshmem(true) + } } mod common_sequential { @@ -9566,7 +9758,7 @@ mod vfio { mod live_migration { use crate::*; - fn start_live_migration( + pub fn start_live_migration( migration_socket: &str, src_api_socket: &str, dest_api_socket: &str, @@ -9652,7 +9844,12 @@ mod live_migration { send_success && receive_success } - fn print_and_panic(src_vm: Child, dest_vm: Child, ovs_vm: Option, message: &str) -> ! { + pub fn print_and_panic( + src_vm: Child, + dest_vm: Child, + ovs_vm: Option, + message: &str, + ) -> ! { let mut src_vm = src_vm; let mut dest_vm = dest_vm; From 78799187e8ff219f901b5b5c21193b8f49a212e5 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Thu, 14 Aug 2025 23:50:24 +0000 Subject: [PATCH 107/294] build: Bump event-listener from 5.4.0 to 5.4.1 Bumps [event-listener](https://github.com/smol-rs/event-listener) from 5.4.0 to 5.4.1. - [Release notes](https://github.com/smol-rs/event-listener/releases) - [Changelog](https://github.com/smol-rs/event-listener/blob/master/CHANGELOG.md) - [Commits](https://github.com/smol-rs/event-listener/compare/v5.4.0...v5.4.1) --- updated-dependencies: - dependency-name: event-listener dependency-version: 5.4.1 dependency-type: indirect update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] --- Cargo.lock | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index dede7fb8d4..6da3de4dc6 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -662,9 +662,9 @@ dependencies = [ [[package]] name = "event-listener" -version = "5.4.0" +version = "5.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3492acde4c3fc54c845eaab3eed8bd00c7a7d881f78bfc801e43a93dec1331ae" +checksum = "e13b66accf52311f30a0db42147dadea9850cb48cd070028831ae5f5d4b856ab" dependencies = [ "concurrent-queue", "parking", From 92f415ea3f27c6e2a4ae7e5365bb69ef8ea6c1b0 Mon Sep 17 00:00:00 2001 From: Philipp Schuster Date: Fri, 15 Aug 2025 09:42:27 +0200 Subject: [PATCH 108/294] build: Bump MSRV to 1.88 This is necessary to use the let-chains feature in a follow-up. After upgrading to Rust edition 2024, clippy wants to collapse various if's with let-chains. Update image to 20250815-0 since MSRV in Dockerfile is updated. Signed-off-by: Philipp Schuster On-behalf-of: SAP philipp.schuster@sap.com --- .github/workflows/build.yaml | 2 +- .github/workflows/docker-image.yaml | 2 +- .github/workflows/preview-riscv64.yaml | 2 +- .github/workflows/release.yaml | 2 +- Cargo.toml | 2 +- resources/Dockerfile | 4 ++-- scripts/dev_cli.sh | 2 +- 7 files changed, 8 insertions(+), 8 deletions(-) diff --git a/.github/workflows/build.yaml b/.github/workflows/build.yaml index 428740dcd0..286c2af548 100644 --- a/.github/workflows/build.yaml +++ b/.github/workflows/build.yaml @@ -15,7 +15,7 @@ jobs: - stable - beta - nightly - - "1.87.0" + - "1.88.0" target: - x86_64-unknown-linux-gnu - x86_64-unknown-linux-musl diff --git a/.github/workflows/docker-image.yaml b/.github/workflows/docker-image.yaml index 3e5f56458c..a026eac2c6 100644 --- a/.github/workflows/docker-image.yaml +++ b/.github/workflows/docker-image.yaml @@ -41,7 +41,7 @@ jobs: images: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }} # generate Docker tags based on the following events/attributes tags: | - type=raw,value=20250807-0 + type=raw,value=20250815-0 type=sha - name: Build and push diff --git a/.github/workflows/preview-riscv64.yaml b/.github/workflows/preview-riscv64.yaml index 02f796b590..767d9779a8 100644 --- a/.github/workflows/preview-riscv64.yaml +++ b/.github/workflows/preview-riscv64.yaml @@ -24,7 +24,7 @@ jobs: fetch-depth: 0 - name: Install Rust toolchain - run: /opt/scripts/exec-in-qemu.sh rustup default 1.87.0 + run: /opt/scripts/exec-in-qemu.sh rustup default 1.88.0 - name: Build ${{ matrix.module }} Module (kvm) run: /opt/scripts/exec-in-qemu.sh cargo rustc --locked -p ${{ matrix.module }} --no-default-features --features "kvm" -- -D warnings -D clippy::undocumented_unsafe_blocks -W clippy::assertions_on_result_states diff --git a/.github/workflows/release.yaml b/.github/workflows/release.yaml index 44842475d1..4876fd5d98 100644 --- a/.github/workflows/release.yaml +++ b/.github/workflows/release.yaml @@ -45,7 +45,7 @@ jobs: target: ${{ matrix.platform.target }} args: ${{ matrix.platform.args }} strip: true - toolchain: "1.87.0" + toolchain: "1.88.0" - name: Copy Release Binaries if: github.event_name == 'create' && github.event.ref_type == 'tag' shell: bash diff --git a/Cargo.toml b/Cargo.toml index c621c256d4..9714ab4f87 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -15,7 +15,7 @@ version = "47.0.0" # a.) A dependency requires it, # b.) If we want to use a new feature and that MSRV is at least 6 months old, # c.) There is a security issue that is addressed by the toolchain update. -rust-version = "1.87.0" +rust-version = "1.88.0" [profile.release] codegen-units = 1 diff --git a/resources/Dockerfile b/resources/Dockerfile index a650c48211..704e8602f3 100644 --- a/resources/Dockerfile +++ b/resources/Dockerfile @@ -8,7 +8,7 @@ FROM ubuntu:24.04 AS dev ARG TARGETARCH -ARG RUST_TOOLCHAIN="1.87.0" +ARG RUST_TOOLCHAIN="1.88.0" ARG CLH_SRC_DIR="/cloud-hypervisor" ARG CLH_BUILD_DIR="$CLH_SRC_DIR/build" ARG CARGO_REGISTRY_DIR="$CLH_BUILD_DIR/cargo_registry" @@ -123,7 +123,7 @@ RUN echo 'source $CARGO_HOME/env' >> "$HOME"/.bashrc \ && mkdir "$HOME"/.cargo \ && ln -s $CARGO_HOME/env "$HOME"/.cargo/env -# Allow pip to install packages system wide +# Allow pip to install packages system wide # hadolint ignore=DL3003,SC2046 RUN rm /usr/lib/python3.12/EXTERNALLY-MANAGED \ && git clone https://github.com/spdk/spdk \ diff --git a/scripts/dev_cli.sh b/scripts/dev_cli.sh index 3349afa9d8..257889ae5b 100755 --- a/scripts/dev_cli.sh +++ b/scripts/dev_cli.sh @@ -9,7 +9,7 @@ CLI_NAME="Cloud Hypervisor" CTR_IMAGE_TAG="ghcr.io/cloud-hypervisor/cloud-hypervisor" # Needs to match explicit version in docker-image.yaml workflow -CTR_IMAGE_VERSION="20250807-0" +CTR_IMAGE_VERSION="20250815-0" : "${CTR_IMAGE:=${CTR_IMAGE_TAG}:${CTR_IMAGE_VERSION}}" DOCKER_RUNTIME="docker" From dac0638fe8aaa4125024c7cbf4993a36bf06cc79 Mon Sep 17 00:00:00 2001 From: Philipp Schuster Date: Fri, 15 Aug 2025 10:31:06 +0200 Subject: [PATCH 109/294] misc: fix clippy Signed-off-by: Philipp Schuster On-behalf-of: SAP philipp.schuster@sap.com --- arch/src/riscv64/fdt.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/src/riscv64/fdt.rs b/arch/src/riscv64/fdt.rs index 580aaa7d3f..ee453eb2fc 100644 --- a/arch/src/riscv64/fdt.rs +++ b/arch/src/riscv64/fdt.rs @@ -119,7 +119,7 @@ fn create_cpu_nodes(fdt: &mut FdtWriter, num_cpus: u32) -> FdtWriterResult<()> { fdt.property_u32("timebase-frequency", timebase_frequency)?; for cpu_index in 0..num_cpus { - let cpu = fdt.begin_node(&format!("cpu@{:x}", cpu_index))?; + let cpu = fdt.begin_node(&format!("cpu@{cpu_index:x}"))?; fdt.property_string("device_type", "cpu")?; fdt.property_string("compatible", "riscv")?; fdt.property_string("mmu-type", "sv48")?; @@ -184,7 +184,7 @@ fn create_memory_node(fdt: &mut FdtWriter, guest_mem: &GuestMemoryMmap) -> FdtWr } let ram_start = super::layout::RAM_START.raw_value(); - let memory_node_name = format!("memory@{:x}", ram_start); + let memory_node_name = format!("memory@{ram_start:x}"); let memory_node = fdt.begin_node(&memory_node_name)?; fdt.property_string("device_type", "memory")?; fdt.property_array_u64("reg", &mem_reg_property)?; From dd8687aebbae67e1fcf9a4c2b1063c2ecbd60d27 Mon Sep 17 00:00:00 2001 From: Philipp Schuster Date: Wed, 25 Jun 2025 13:49:05 +0200 Subject: [PATCH 110/294] vmm: add enum PayloadConfigError validation to improve error reporting Currently, the following scenarios are supported by Cloud Hypervisor to bootstrap a VM: 1. provide firmware 2. provide kernel 3. provide kernel + cmdline 4. provide kernel + initrd 5. provide kernel + cmdline + initrd As the difference between `--firmware` and `--kernel` is not very clear currently, especially as both use/support a Xen PVH entry, adding this helps to identify the cause of misconfiguration. Signed-off-by: Philipp Schuster On-behalf-of: SAP philipp.schuster@sap.com --- README.md | 14 +++++++++++++- vmm/src/config.rs | 17 +++++++++++------ vmm/src/vm.rs | 17 ++++++----------- vmm/src/vm_config.rs | 45 ++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 75 insertions(+), 18 deletions(-) diff --git a/README.md b/README.md index 4609903d0f..fdb18255f0 100644 --- a/README.md +++ b/README.md @@ -153,7 +153,7 @@ interface will be enabled as per `network-config` details. $ sudo setcap cap_net_admin+ep ./cloud-hypervisor $ ./create-cloud-init.sh $ ./cloud-hypervisor \ - --kernel ./hypervisor-fw \ + --firmware ./hypervisor-fw \ --disk path=focal-server-cloudimg-amd64.raw path=/tmp/ubuntu-cloudinit.img \ --cpus boot=4 \ --memory size=1024M \ @@ -175,6 +175,18 @@ $ ./cloud-hypervisor \ --console off ``` +## Booting: `--firmware` vs `--kernel` + +The following scenarios are supported by Cloud Hypervisor to bootstrap a VM, i.e., +to load a payload/bootitem(s): + +- Provide firmware +- Provide kernel \[+ cmdline\]\ [+ initrd\] + +Please note that our Cloud Hypervisor firmware (`hypervisor-fw`) has a Xen PVH +boot entry, therefore it can also be booted via the `--kernel` parameter, as +seen in some examples. + ### Custom Kernel and Disk Image #### Building your Kernel diff --git a/vmm/src/config.rs b/vmm/src/config.rs index 1977c8728c..06523761ad 100644 --- a/vmm/src/config.rs +++ b/vmm/src/config.rs @@ -181,9 +181,6 @@ pub enum Error { #[derive(Debug, PartialEq, Eq, Error)] pub enum ValidationError { - /// No kernel specified - #[error("No kernel specified")] - KernelMissing, /// Missing file value for console #[error("Path missing when using file console mode")] ConsoleFileMissing, @@ -356,6 +353,8 @@ pub enum ValidationError { /// Invalid Ivshmem backend file path #[error("Invalid ivshmem backend file path")] InvalidIvshmemPath, + #[error("Payload configuration is not bootable")] + PayloadError(#[from] PayloadConfigError), } type ValidationResult = std::result::Result; @@ -2507,9 +2506,13 @@ impl VmConfig { pub fn validate(&mut self) -> ValidationResult> { let mut id_list = BTreeSet::new(); + // Is the payload configuration bootable? self.payload - .as_ref() - .ok_or(ValidationError::KernelMissing)?; + .as_mut() + .ok_or(ValidationError::PayloadError( + PayloadConfigError::MissingBootitem, + ))? + .validate()?; #[cfg(feature = "tdx")] { @@ -4216,7 +4219,9 @@ mod tests { invalid_config.payload = None; assert_eq!( invalid_config.validate(), - Err(ValidationError::KernelMissing) + Err(ValidationError::PayloadError( + PayloadConfigError::MissingBootitem + )) ); let mut invalid_config = valid_config.clone(); diff --git a/vmm/src/vm.rs b/vmm/src/vm.rs index 7e8851194d..347b39dc31 100644 --- a/vmm/src/vm.rs +++ b/vmm/src/vm.rs @@ -317,9 +317,6 @@ pub enum Error { #[error("Error joining kernel loading thread")] KernelLoadThreadJoin(std::boxed::Box), - #[error("Payload configuration is not bootable")] - InvalidPayload, - #[cfg(all(target_arch = "x86_64", feature = "guest_debug"))] #[error("Error coredumping VM")] Coredump(#[source] GuestDebuggableError), @@ -1217,7 +1214,7 @@ impl Vm { Self::load_firmware(&firmware, memory_manager)?; arch::layout::UEFI_START } - _ => return Err(Error::InvalidPayload), + _ => unreachable!("Unsupported boot configuration: programming error from 'PayloadConfigError::validate()'"), }; Ok(EntryPoint { entry_addr }) @@ -1267,7 +1264,7 @@ impl Vm { Self::load_firmware(&firmware, memory_manager)?; arch::layout::UEFI_START } - _ => return Err(Error::InvalidPayload), + _ => unreachable!("Unsupported boot configuration: programming error from 'PayloadConfigError::validate()'"), }; Ok(EntryPoint { entry_addr }) @@ -1385,19 +1382,17 @@ impl Vm { match ( &payload.firmware, &payload.kernel, - &payload.initramfs, - &payload.cmdline, ) { - (Some(firmware), None, None, None) => { + (Some(firmware), None) => { let firmware = File::open(firmware).map_err(Error::FirmwareFile)?; Self::load_kernel(firmware, None, memory_manager) } - (None, Some(kernel), _, _) => { + (None, Some(kernel)) => { let kernel = File::open(kernel).map_err(Error::KernelFile)?; let cmdline = Self::generate_cmdline(payload)?; Self::load_kernel(kernel, Some(cmdline), memory_manager) } - _ => Err(Error::InvalidPayload), + _ => unreachable!("Unsupported boot configuration: programming error from 'PayloadConfigError::validate()'"), } } @@ -1415,7 +1410,7 @@ impl Vm { let kernel = File::open(kernel).map_err(Error::KernelFile)?; Self::load_kernel(None, Some(kernel), memory_manager) } - _ => Err(Error::InvalidPayload), + _ => unreachable!("Unsupported boot configuration: programming error from 'PayloadConfigError::validate()'"), } } diff --git a/vmm/src/vm_config.rs b/vmm/src/vm_config.rs index 50841eeed8..cf1f61e05c 100644 --- a/vmm/src/vm_config.rs +++ b/vmm/src/vm_config.rs @@ -10,6 +10,7 @@ use std::{fs, result}; use net_util::MacAddr; use serde::{Deserialize, Serialize}; +use thiserror::Error; use virtio_devices::RateLimiterConfig; use crate::landlock::LandlockError; @@ -705,6 +706,21 @@ pub struct NumaConfig { pub pci_segments: Option>, } +/// Errors describing a misconfigured payload, i.e., a configuration that +/// can't be booted by Cloud Hypervisor. +/// +/// This typically is the case for invalid combinations of cmdline, kernel, +/// firmware, and initrd. +#[derive(Debug, Error, PartialEq, Eq)] +pub enum PayloadConfigError { + /// Specifying a kernel is not supported when a firmware is provided. + #[error("Specifying a kernel is not supported when a firmware is provided")] + FirmwarePlusOtherPayloads, + /// No bootitem provided: neither firmware nor kernel. + #[error("No bootitem provided: neither firmware nor kernel")] + MissingBootitem, +} + #[derive(Clone, Debug, Deserialize, Eq, PartialEq, Serialize)] pub struct PayloadConfig { #[serde(default)] @@ -796,6 +812,35 @@ impl FromStr for FwCfgItemList { } } +impl PayloadConfig { + /// Validates the payload config. + /// + /// Succeeds if Cloud Hypervisor will be able to boot the configuration. + /// Further, warns for some odd configurations. + pub fn validate(&mut self) -> Result<(), PayloadConfigError> { + match (&self.firmware, &self.kernel) { + (Some(_firmware), Some(_kernel)) => Err(PayloadConfigError::FirmwarePlusOtherPayloads), + (Some(_firmware), None) => { + if self.cmdline.is_some() { + log::warn!("Ignoring cmdline parameter as firmware is provided as the payload"); + self.cmdline = None; + } + if self.initramfs.is_some() { + log::warn!( + "Ignoring initramfs parameter as firmware is provided as the payload" + ); + self.initramfs = None; + } + Ok(()) + } + (None, Some(_kernel)) => Ok(()), + (None, None) => Err(PayloadConfigError::MissingBootitem), + }?; + + Ok(()) + } +} + impl ApplyLandlock for PayloadConfig { fn apply_landlock(&self, landlock: &mut Landlock) -> LandlockResult<()> { // Payload only needs read access From 2c7d6be3f42ff0c108b8b33b0af88f400e063625 Mon Sep 17 00:00:00 2001 From: Peter Oskolkov Date: Fri, 15 Aug 2025 01:50:42 +0000 Subject: [PATCH 111/294] arch: don't construct mptable on x86_64 if too many CPUs MP table is a legacy device that is incompatible with x2apic CPU IDs exceeding 254. The Linux kernel is perfectly happy without MP table in these cases. Signed-off-by: Barret Rhoden Signed-off-by: Neel Natu Signed-off-by: Ofir Weisse Signed-off-by: Peter Oskolkov --- arch/src/x86_64/mod.rs | 6 +++++- arch/src/x86_64/mptable.rs | 31 +++++++++++++------------------ 2 files changed, 18 insertions(+), 19 deletions(-) diff --git a/arch/src/x86_64/mod.rs b/arch/src/x86_64/mod.rs index a4c0b88bf1..27e1375d8a 100644 --- a/arch/src/x86_64/mod.rs +++ b/arch/src/x86_64/mod.rs @@ -33,6 +33,10 @@ use std::arch::x86_64; #[cfg(feature = "tdx")] pub mod tdx; +// While modern architectures support more than 255 CPUs via x2APIC, +// legacy devices such as mptable support at most 254 CPUs. +pub(crate) const MAX_SUPPORTED_CPUS_LEGACY: u32 = 254; + // CPUID feature bits #[cfg(feature = "kvm")] const TSC_DEADLINE_TIMER_ECX_BIT: u8 = 24; // tsc deadline timer ecx bit. @@ -915,7 +919,7 @@ pub fn configure_vcpu( // does not recognize the last vCPU if x2apic is not enabled when // there are 256 vCPUs in a flat hierarchy (i.e. max x2apic ID is 255), // so we need to enable x2apic in this case as well. - let enable_x2_apic_mode = get_max_x2apic_id(topology) >= 255; + let enable_x2_apic_mode = get_max_x2apic_id(topology) > MAX_SUPPORTED_CPUS_LEGACY; regs::setup_sregs(&guest_memory.memory(), vcpu, enable_x2_apic_mode) .map_err(Error::SregsConfiguration)?; } diff --git a/arch/src/x86_64/mptable.rs b/arch/src/x86_64/mptable.rs index d709a0043c..d688e41374 100644 --- a/arch/src/x86_64/mptable.rs +++ b/arch/src/x86_64/mptable.rs @@ -11,6 +11,7 @@ use libc::c_uchar; use thiserror::Error; use vm_memory::{Address, ByteValued, Bytes, GuestAddress, GuestMemory, GuestMemoryError}; +use super::MAX_SUPPORTED_CPUS_LEGACY; use crate::layout::{APIC_START, HIGH_RAM_START, IOAPIC_START}; use crate::x86_64::{get_x2apic_id, mpspec}; use crate::GuestMemoryMmap; @@ -61,9 +62,6 @@ pub enum Error { /// Failure while zeroing out the memory for the MP table. #[error("Failure while zeroing out the memory for the MP table")] Clear(#[source] GuestMemoryError), - /// Number of CPUs exceeds the maximum supported CPUs - #[error("Number of CPUs exceeds the maximum supported CPUs")] - TooManyCpus, /// Failure to write the MP floating pointer. #[error("Failure to write the MP floating pointer")] WriteMpfIntel(#[source] GuestMemoryError), @@ -89,11 +87,6 @@ pub enum Error { pub type Result = result::Result; -// With APIC/xAPIC, there are only 255 APIC IDs available. And IOAPIC occupies -// one APIC ID, so only 254 CPUs at maximum may be supported. Actually it's -// a large number for FC usecases. -pub const MAX_SUPPORTED_CPUS: u32 = 254; - // Most of these variables are sourced from the Intel MP Spec 1.4. const SMP_MAGIC_IDENT: &[c_uchar; 4] = b"_MP_"; const MPC_SIGNATURE: &[c_uchar; 4] = b"PCMP"; @@ -141,8 +134,9 @@ pub fn setup_mptable( if num_cpus > 0 { let cpu_id_max = num_cpus - 1; let x2apic_id_max = get_x2apic_id(cpu_id_max, topology); - if x2apic_id_max >= MAX_SUPPORTED_CPUS { - return Err(Error::TooManyCpus); + if x2apic_id_max >= MAX_SUPPORTED_CPUS_LEGACY { + info!("Skipping mptable creation due to too many CPUs"); + return Ok(()); } } @@ -157,7 +151,7 @@ pub fn setup_mptable( } let mut checksum: u8 = 0; - let ioapicid: u8 = MAX_SUPPORTED_CPUS as u8 + 1; + let ioapicid: u8 = MAX_SUPPORTED_CPUS_LEGACY as u8 + 1; // The checked_add here ensures the all of the following base_mp.unchecked_add's will be without // overflow. @@ -392,11 +386,13 @@ mod tests { #[test] fn cpu_entry_count() { - let mem = - GuestMemoryMmap::from_ranges(&[(MPTABLE_START, compute_mp_size(MAX_SUPPORTED_CPUS))]) - .unwrap(); + let mem = GuestMemoryMmap::from_ranges(&[( + MPTABLE_START, + compute_mp_size(MAX_SUPPORTED_CPUS_LEGACY), + )]) + .unwrap(); - for i in 0..MAX_SUPPORTED_CPUS { + for i in 0..MAX_SUPPORTED_CPUS_LEGACY { setup_mptable(MPTABLE_START, &mem, i, None).unwrap(); let mpf_intel: MpfIntelWrapper = mem.read_obj(MPTABLE_START).unwrap(); @@ -426,10 +422,9 @@ mod tests { #[test] fn cpu_entry_count_max() { - let cpus = MAX_SUPPORTED_CPUS + 1; + let cpus = MAX_SUPPORTED_CPUS_LEGACY + 1; let mem = GuestMemoryMmap::from_ranges(&[(MPTABLE_START, compute_mp_size(cpus))]).unwrap(); - let result = setup_mptable(MPTABLE_START, &mem, cpus, None); - result.unwrap_err(); + setup_mptable(MPTABLE_START, &mem, cpus, None).unwrap(); } } From a13200102144a9d7642e25a8a9301ad238db4e23 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Fri, 15 Aug 2025 23:28:16 +0000 Subject: [PATCH 112/294] build: Bump smallvec from 1.13.2 to 1.15.1 Bumps [smallvec](https://github.com/servo/rust-smallvec) from 1.13.2 to 1.15.1. - [Release notes](https://github.com/servo/rust-smallvec/releases) - [Commits](https://github.com/servo/rust-smallvec/compare/v1.13.2...v1.15.1) --- updated-dependencies: - dependency-name: smallvec dependency-version: 1.15.1 dependency-type: direct:production update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] --- Cargo.lock | 4 ++-- block/Cargo.toml | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 6da3de4dc6..346f40fada 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1983,9 +1983,9 @@ checksum = "7a2ae44ef20feb57a68b23d846850f861394c2e02dc425a50098ae8c90267589" [[package]] name = "smallvec" -version = "1.13.2" +version = "1.15.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3c5e1a9a646d36c3599cd173a41282daf47c44583ad367b8e6837255952e5c67" +checksum = "67b1b7a3b5fe4f1376887184045fcf45c69e92af734b7aaddc05fb777b6fbd03" [[package]] name = "spin" diff --git a/block/Cargo.toml b/block/Cargo.toml index f1e102e922..aac824a004 100644 --- a/block/Cargo.toml +++ b/block/Cargo.toml @@ -16,7 +16,7 @@ libc = { workspace = true } log = { workspace = true } remain = "0.2.15" serde = { workspace = true, features = ["derive"] } -smallvec = "1.13.2" +smallvec = "1.15.1" thiserror = { workspace = true } uuid = { workspace = true, features = ["v4"] } virtio-bindings = { workspace = true } From c34baac6497a29809dde5af0c1ca26fa963d3db0 Mon Sep 17 00:00:00 2001 From: Philipp Schuster Date: Mon, 18 Aug 2025 11:34:22 +0200 Subject: [PATCH 113/294] build: fix warnings from latest nightly (1.91) The underlying problem currently causes unrelated PRs to fail. This commit fixes that. Signed-off-by: Philipp Schuster On-behalf-of: SAP philipp.schuster@sap.com --- pci/src/msix.rs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pci/src/msix.rs b/pci/src/msix.rs index f323a69179..718c4f83ee 100644 --- a/pci/src/msix.rs +++ b/pci/src/msix.rs @@ -211,7 +211,7 @@ impl MsixConfig { } pub fn read_table(&self, offset: u64, data: &mut [u8]) { - assert!((data.len() == 4 || data.len() == 8)); + assert!(data.len() == 4 || data.len() == 8); let index: usize = (offset / MSIX_TABLE_ENTRIES_MODULO) as usize; let modulo_offset = offset % MSIX_TABLE_ENTRIES_MODULO; @@ -264,7 +264,7 @@ impl MsixConfig { } pub fn write_table(&mut self, offset: u64, data: &[u8]) { - assert!((data.len() == 4 || data.len() == 8)); + assert!(data.len() == 4 || data.len() == 8); let index: usize = (offset / MSIX_TABLE_ENTRIES_MODULO) as usize; let modulo_offset = offset % MSIX_TABLE_ENTRIES_MODULO; @@ -360,7 +360,7 @@ impl MsixConfig { } pub fn read_pba(&mut self, offset: u64, data: &mut [u8]) { - assert!((data.len() == 4 || data.len() == 8)); + assert!(data.len() == 4 || data.len() == 8); let index: usize = (offset / MSIX_PBA_ENTRIES_MODULO) as usize; let modulo_offset = offset % MSIX_PBA_ENTRIES_MODULO; From 09cf8fba5b2ecca48347a789e24b66496d0678fc Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Tue, 19 Aug 2025 09:42:49 +0000 Subject: [PATCH 114/294] build: Bump bitflags from 2.9.0 to 2.9.2 Bumps [bitflags](https://github.com/bitflags/bitflags) from 2.9.0 to 2.9.2. - [Release notes](https://github.com/bitflags/bitflags/releases) - [Changelog](https://github.com/bitflags/bitflags/blob/main/CHANGELOG.md) - [Commits](https://github.com/bitflags/bitflags/compare/2.9.0...2.9.2) --- updated-dependencies: - dependency-name: bitflags dependency-version: 2.9.2 dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] --- Cargo.lock | 28 ++++++++++++++-------------- Cargo.toml | 2 +- 2 files changed, 15 insertions(+), 15 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 346f40fada..167390122b 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -299,9 +299,9 @@ checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a" [[package]] name = "bitflags" -version = "2.9.0" +version = "2.9.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5c8214115b7bf84099f1309324e63141d4c5d7cc26862f97a0a857dbefe165bd" +checksum = "6a65b545ab31d687cff52899d4890855fec459eb6afe0da6417b8a18da87aa29" [[package]] name = "block" @@ -527,7 +527,7 @@ dependencies = [ "anyhow", "arch", "bitfield-struct", - "bitflags 2.9.0", + "bitflags 2.9.2", "byteorder", "event_monitor", "hypervisor", @@ -640,7 +640,7 @@ version = "4.3.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "74351c3392ea1ff6cd2628e0042d268ac2371cb613252ff383b6dfa50d22fa79" dependencies = [ - "bitflags 2.9.0", + "bitflags 2.9.2", "libc", ] @@ -829,7 +829,7 @@ version = "0.7.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "71d66e32caf5dd59f561be0143e413e01d651bd8498eb9aa0be8c482c81c8d31" dependencies = [ - "bitflags 2.9.0", + "bitflags 2.9.2", "cfg-if", "log", "managed", @@ -1077,7 +1077,7 @@ version = "0.22.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b702df98508cb63ad89dd9beb9f6409761b30edca10d48e57941d3f11513a006" dependencies = [ - "bitflags 2.9.0", + "bitflags 2.9.2", "kvm-bindings", "libc", "vmm-sys-util", @@ -1112,7 +1112,7 @@ version = "0.1.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c0ff37bd590ca25063e35af745c343cb7a0271906fb7b37e4813e8f79f00268d" dependencies = [ - "bitflags 2.9.0", + "bitflags 2.9.2", "libc", ] @@ -1293,7 +1293,7 @@ version = "0.30.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "74523f3a35e05aba87a1d978330aef40f67b0304ac79c1c00b294c9830543db6" dependencies = [ - "bitflags 2.9.0", + "bitflags 2.9.2", "cfg-if", "cfg_aliases", "libc", @@ -1834,7 +1834,7 @@ version = "0.38.44" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "fdb5bc1ae2baa591800df16c9ca78619bf65c0488b41b96ccec5d11220d8c154" dependencies = [ - "bitflags 2.9.0", + "bitflags 2.9.2", "errno", "libc", "linux-raw-sys 0.4.15", @@ -1847,7 +1847,7 @@ version = "1.0.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c71e83d6afe7ff64890ec6b71d6a69bb8a610ab78ce364b3352876bb4c801266" dependencies = [ - "bitflags 2.9.0", + "bitflags 2.9.2", "errno", "libc", "linux-raw-sys 0.9.4", @@ -2259,7 +2259,7 @@ version = "0.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ed81c5ed8224d468a322e923777ed0615cad433fe61177126098af995f89cecf" dependencies = [ - "bitflags 2.9.0", + "bitflags 2.9.2", "libc", "log", "serde", @@ -2277,7 +2277,7 @@ version = "0.14.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2a4dcad85a129d97d5d4b2f3c47a4affdeedd76bdcd02094bcb5d9b76cac2d05" dependencies = [ - "bitflags 2.9.0", + "bitflags 2.9.2", "libc", "uuid", "vm-memory", @@ -2450,7 +2450,7 @@ dependencies = [ "acpi_tables", "anyhow", "arch", - "bitflags 2.9.0", + "bitflags 2.9.2", "block", "blocking", "cfg-if", @@ -2852,7 +2852,7 @@ version = "0.39.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6f42320e61fe2cfd34354ecb597f86f413484a798ba44a8ca1165c58d42da6c1" dependencies = [ - "bitflags 2.9.0", + "bitflags 2.9.2", ] [[package]] diff --git a/Cargo.toml b/Cargo.toml index 9714ab4f87..21e9cf1348 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -136,7 +136,7 @@ serde_with = { version = "3.14.0", default-features = false } # other crates anyhow = "1.0.98" -bitflags = "2.9.0" +bitflags = "2.9.2" byteorder = "1.5.0" cfg-if = "1.0.0" clap = "4.5.13" From e454bb629a7073316876ef3b3869cf047c1728ec Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Tue, 19 Aug 2025 11:48:51 +0000 Subject: [PATCH 115/294] build: Bump mintex from 0.1.3 to 0.1.4 Bumps [mintex](https://github.com/garypen/mintex) from 0.1.3 to 0.1.4. - [Commits](https://github.com/garypen/mintex/compare/v0.1.3...v0.1.4) --- updated-dependencies: - dependency-name: mintex dependency-version: 0.1.4 dependency-type: indirect update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] --- Cargo.lock | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 167390122b..6f6256c9fa 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1219,9 +1219,9 @@ dependencies = [ [[package]] name = "mintex" -version = "0.1.3" +version = "0.1.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9bec4598fddb13cc7b528819e697852653252b760f1228b7642679bf2ff2cd07" +checksum = "c505b3e17ed6b70a7ed2e67fbb2c560ee327353556120d6e72f5232b6880d536" [[package]] name = "mshv-bindings" From 91d15c331d534bb2a1337283ce6302f74a528fd2 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Tue, 19 Aug 2025 17:18:19 +0000 Subject: [PATCH 116/294] build: Bump signal-hook-registry from 1.4.2 to 1.4.6 Bumps [signal-hook-registry](https://github.com/vorner/signal-hook) from 1.4.2 to 1.4.6. - [Changelog](https://github.com/vorner/signal-hook/blob/master/CHANGELOG.md) - [Commits](https://github.com/vorner/signal-hook/compare/registry-v1.4.2...registry-v1.4.6) --- updated-dependencies: - dependency-name: signal-hook-registry dependency-version: 1.4.6 dependency-type: indirect update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] --- Cargo.lock | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 6f6256c9fa..a18feab93b 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1968,9 +1968,9 @@ dependencies = [ [[package]] name = "signal-hook-registry" -version = "1.4.2" +version = "1.4.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a9e9e0b4211b72e7b8b6e85c807d36c212bdb33ea8587f7569562a84df5465b1" +checksum = "b2a4719bff48cee6b39d12c020eeb490953ad2443b7055bd0b21fca26bd8c28b" dependencies = [ "libc", ] From 3bff69734e9940e166b9387ba210c0b444b66e20 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Wed, 20 Aug 2025 02:06:49 +0000 Subject: [PATCH 117/294] build: Bump crate-ci/typos from 1.35.4 to 1.35.5 Bumps [crate-ci/typos](https://github.com/crate-ci/typos) from 1.35.4 to 1.35.5. - [Release notes](https://github.com/crate-ci/typos/releases) - [Changelog](https://github.com/crate-ci/typos/blob/master/CHANGELOG.md) - [Commits](https://github.com/crate-ci/typos/compare/v1.35.4...v1.35.5) --- updated-dependencies: - dependency-name: crate-ci/typos dependency-version: 1.35.5 dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] --- .github/workflows/quality.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/quality.yaml b/.github/workflows/quality.yaml index bdd407b765..317fa84150 100644 --- a/.github/workflows/quality.yaml +++ b/.github/workflows/quality.yaml @@ -167,4 +167,4 @@ jobs: steps: - uses: actions/checkout@v5 # Executes "typos ." - - uses: crate-ci/typos@v1.35.4 + - uses: crate-ci/typos@v1.35.5 From 34c5a081aa234a2e7963c365d051074471644162 Mon Sep 17 00:00:00 2001 From: Ruoqing He Date: Tue, 19 Aug 2025 20:25:55 +0000 Subject: [PATCH 118/294] ci: Enable kvm build test on RISC-V Enable kvm build test and clippy test on RISC-V 64-bit platform to ensure whole projects builds properly. Signed-off-by: Ruoqing He --- .github/workflows/preview-riscv64-build.yaml | 30 +++++++++++++++++++ ...cv64.yaml => preview-riscv64-modules.yaml} | 0 2 files changed, 30 insertions(+) create mode 100644 .github/workflows/preview-riscv64-build.yaml rename .github/workflows/{preview-riscv64.yaml => preview-riscv64-modules.yaml} (100%) diff --git a/.github/workflows/preview-riscv64-build.yaml b/.github/workflows/preview-riscv64-build.yaml new file mode 100644 index 0000000000..929a60147a --- /dev/null +++ b/.github/workflows/preview-riscv64-build.yaml @@ -0,0 +1,30 @@ +name: Cloud Hypervisor RISC-V 64-bit kvm build Preview +on: [pull_request, merge_group] +concurrency: + group: ${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: true + +jobs: + build: + name: Cargo + runs-on: riscv64-qemu-host + strategy: + fail-fast: false + + steps: + - name: Code checkout + uses: actions/checkout@v5 + with: + fetch-depth: 0 + + - name: Install Rust toolchain + run: /opt/scripts/exec-in-qemu.sh rustup default 1.88.0 + + - name: Build test (kvm) + run: /opt/scripts/exec-in-qemu.sh cargo rustc --locked --no-default-features --features "kvm" + + - name: Clippy test (kvm) + run: /opt/scripts/exec-in-qemu.sh cargo clippy --locked --no-default-features --features "kvm" + + - name: Check no files were modified + run: test -z "$(git status --porcelain)" diff --git a/.github/workflows/preview-riscv64.yaml b/.github/workflows/preview-riscv64-modules.yaml similarity index 100% rename from .github/workflows/preview-riscv64.yaml rename to .github/workflows/preview-riscv64-modules.yaml From c1f4df600fc79d5b2f807472218b90f63bc34e40 Mon Sep 17 00:00:00 2001 From: Philipp Schuster Date: Wed, 20 Aug 2025 11:40:48 +0200 Subject: [PATCH 119/294] vmm: cpu_manager: massively accelerate .pause() With 254 vCPUs, pausing now takes ~4ms instead of >254ms. This improvement is visible when running `ch-remote pause` and is particularly important for live migration, where every millisecond of downtime matters. For the wait logic, it is fine to stick to the approach of sleeping 1ms on the first missed ACK as: 1) we have to wait anyway 2) we give time to the OS, enabling it to schedule a vCPU thread next Signed-off-by: Philipp Schuster On-behalf-of: SAP philipp.schuster@sap.com --- vmm/src/cpu.rs | 56 +++++++++++++++++++++++++++++++------------------- 1 file changed, 35 insertions(+), 21 deletions(-) diff --git a/vmm/src/cpu.rs b/vmm/src/cpu.rs index 629b0152b6..0f4ac18cc8 100644 --- a/vmm/src/cpu.rs +++ b/vmm/src/cpu.rs @@ -639,13 +639,25 @@ impl VcpuState { self.handle.is_some() } + /// Sends a signal to the underlying thread. + /// + /// Please call [`Self::wait_until_signal_acknowledged`] afterward to block + /// until the vCPU thread has acknowledged the signal. fn signal_thread(&self) { if let Some(handle) = self.handle.as_ref() { + // SAFETY: FFI call with correct arguments + unsafe { + libc::pthread_kill(handle.as_pthread_t() as _, SIGRTMIN()); + } + } + } + + /// Blocks until the vCPU thread has acknowledged the signal. + /// + /// This is the counterpart of [`Self::signal_thread`]. + fn wait_until_signal_acknowledged(&self) { + if let Some(_handle) = self.handle.as_ref() { loop { - // SAFETY: FFI call with correct arguments - unsafe { - libc::pthread_kill(handle.as_pthread_t() as _, SIGRTMIN()); - } if self.vcpu_run_interrupted.load(Ordering::SeqCst) { break; } else { @@ -1299,6 +1311,7 @@ impl CpuManager { let state = &mut self.vcpu_states[usize::try_from(cpu_id).unwrap()]; state.kill.store(true, Ordering::SeqCst); state.signal_thread(); + state.wait_until_signal_acknowledged(); state.join_thread()?; state.handle = None; @@ -1366,6 +1379,21 @@ impl CpuManager { } } + /// Signal to the spawned threads (vCPUs and console signal handler). + /// + /// For the vCPU threads this will interrupt the KVM_RUN ioctl() allowing + /// the loop to check the shared state booleans. + fn signal_vcpus(&self) { + // Splitting this into two loops reduced the time to pause many vCPUs + // massively. Example: 254 vCPUs. >254ms -> ~4ms. + for state in self.vcpu_states.iter() { + state.signal_thread(); + } + for state in self.vcpu_states.iter() { + state.wait_until_signal_acknowledged(); + } + } + pub fn shutdown(&mut self) -> Result<()> { // Tell the vCPUs to stop themselves next time they go through the loop self.vcpus_kill_signalled.store(true, Ordering::SeqCst); @@ -1378,12 +1406,7 @@ impl CpuManager { state.unpark_thread(); } - // Signal to the spawned threads (vCPUs and console signal handler). For the vCPU threads - // this will interrupt the KVM_RUN ioctl() allowing the loop to check the boolean set - // above. - for state in self.vcpu_states.iter() { - state.signal_thread(); - } + self.signal_vcpus(); // Wait for all the threads to finish. This removes the state from the vector. for mut state in self.vcpu_states.drain(..) { @@ -1934,11 +1957,7 @@ impl CpuManager { pub(crate) fn nmi(&self) -> Result<()> { self.vcpus_kick_signalled.store(true, Ordering::SeqCst); - - for state in self.vcpu_states.iter() { - state.signal_thread(); - } - + self.signal_vcpus(); self.vcpus_kick_signalled.store(false, Ordering::SeqCst); Ok(()) @@ -2300,12 +2319,7 @@ impl Pausable for CpuManager { // Tell the vCPUs to pause themselves next time they exit self.vcpus_pause_signalled.store(true, Ordering::SeqCst); - // Signal to the spawned threads (vCPUs and console signal handler). For the vCPU threads - // this will interrupt the KVM_RUN ioctl() allowing the loop to check the boolean set - // above. - for state in self.vcpu_states.iter() { - state.signal_thread(); - } + self.signal_vcpus(); for vcpu in self.vcpus.iter() { let mut vcpu = vcpu.lock().unwrap(); From 1a63b4b2ffaa21ae5c42739ccf790478b807e8a1 Mon Sep 17 00:00:00 2001 From: Bo Chen Date: Mon, 18 Aug 2025 19:13:52 +0000 Subject: [PATCH 120/294] vmm: Consolidate 'load_firmware/kernel' for aarch64 and riscv Both functions are defined separately for the two architecture with minor differences. * `load_firmware()`: call `arch::uefi::load_uefi` which are available on both architecture; * `load_kernel()`: manually align to `arch::layout::KERNEL_START` 2MB for both architecture (e.g. no-op for `aarch64`); Signed-off-by: Bo Chen --- arch/src/lib.rs | 2 +- vmm/src/vm.rs | 57 +++++-------------------------------------------- 2 files changed, 6 insertions(+), 53 deletions(-) diff --git a/arch/src/lib.rs b/arch/src/lib.rs index cbeb37f51d..aff58ffe31 100644 --- a/arch/src/lib.rs +++ b/arch/src/lib.rs @@ -97,7 +97,7 @@ pub mod riscv64; pub use riscv64::{ arch_memory_regions, configure_system, configure_vcpu, fdt::DeviceInfoForFdt, get_host_cpu_phys_bits, initramfs_load_addr, layout, layout::CMDLINE_MAX_SIZE, - layout::IRQ_BASE, EntryPoint, _NSIG, + layout::IRQ_BASE, uefi, EntryPoint, _NSIG, }; #[cfg(target_arch = "x86_64")] diff --git a/vmm/src/vm.rs b/vmm/src/vm.rs index 347b39dc31..938bec8752 100644 --- a/vmm/src/vm.rs +++ b/vmm/src/vm.rs @@ -1172,64 +1172,16 @@ impl Vm { Ok(cmdline) } - #[cfg(target_arch = "aarch64")] - fn load_firmware(mut firmware: &File, memory_manager: Arc>) -> Result<()> { - let uefi_flash = memory_manager.lock().as_ref().unwrap().uefi_flash(); - let mem = uefi_flash.memory(); - arch::aarch64::uefi::load_uefi(mem.deref(), arch::layout::UEFI_START, &mut firmware) - .map_err(Error::UefiLoad)?; - Ok(()) - } - - #[cfg(target_arch = "aarch64")] - fn load_kernel( - firmware: Option, - kernel: Option, - memory_manager: Arc>, - ) -> Result { - let guest_memory = memory_manager.lock().as_ref().unwrap().guest_memory(); - let mem = guest_memory.memory(); - let entry_addr = match (firmware, kernel) { - (None, Some(mut kernel)) => { - match linux_loader::loader::pe::PE::load( - mem.deref(), - Some(arch::layout::KERNEL_START), - &mut kernel, - None, - ) { - Ok(entry_addr) => entry_addr.kernel_load, - // Try to load the binary as kernel PE file at first. - // If failed, retry to load it as UEFI binary. - // As the UEFI binary is formatless, it must be the last option to try. - Err(linux_loader::loader::Error::Pe(InvalidImageMagicNumber)) => { - Self::load_firmware(&kernel, memory_manager)?; - arch::layout::UEFI_START - } - Err(e) => { - return Err(Error::KernelLoad(e)); - } - } - } - (Some(firmware), None) => { - Self::load_firmware(&firmware, memory_manager)?; - arch::layout::UEFI_START - } - _ => unreachable!("Unsupported boot configuration: programming error from 'PayloadConfigError::validate()'"), - }; - - Ok(EntryPoint { entry_addr }) - } - - #[cfg(target_arch = "riscv64")] + #[cfg(any(target_arch = "aarch64", target_arch = "riscv64"))] fn load_firmware(mut firmware: &File, memory_manager: Arc>) -> Result<()> { let uefi_flash = memory_manager.lock().as_ref().unwrap().uefi_flash(); let mem = uefi_flash.memory(); - arch::riscv64::uefi::load_uefi(mem.deref(), arch::layout::UEFI_START, &mut firmware) + arch::uefi::load_uefi(mem.deref(), arch::layout::UEFI_START, &mut firmware) .map_err(Error::UefiLoad)?; Ok(()) } - #[cfg(target_arch = "riscv64")] + #[cfg(any(target_arch = "aarch64", target_arch = "riscv64"))] fn load_kernel( firmware: Option, kernel: Option, @@ -1238,7 +1190,8 @@ impl Vm { let guest_memory = memory_manager.lock().as_ref().unwrap().guest_memory(); let mem = guest_memory.memory(); let alignment = 0x20_0000; - let aligned_kernel_addr = arch::layout::KERNEL_START.0 + (alignment - 1) & !(alignment - 1); + let aligned_kernel_addr = + (arch::layout::KERNEL_START.0 + (alignment - 1)) & !(alignment - 1); let entry_addr = match (firmware, kernel) { (None, Some(mut kernel)) => { match linux_loader::loader::pe::PE::load( From 2c6426460efdacb2ce60b2aba13a6463e097d9ae Mon Sep 17 00:00:00 2001 From: Philipp Schuster Date: Tue, 12 Aug 2025 12:39:38 +0200 Subject: [PATCH 121/294] vmm: harmonize bootpath across architectures On aarch64 and RISC-V, calling load_firmware() through load_kernel() provides no benefit and only duplicates checks already performed in load_payload(). load_payload() now directly invokes load_firmware() or load_kernel(), removing unnecessary indirection and redundancy. Signed-off-by: Philipp Schuster On-behalf-of: SAP philipp.schuster@sap.com --- vmm/src/vm.rs | 60 +++++++++++++++++++++++++-------------------------- 1 file changed, 30 insertions(+), 30 deletions(-) diff --git a/vmm/src/vm.rs b/vmm/src/vm.rs index 938bec8752..af1ddaaa46 100644 --- a/vmm/src/vm.rs +++ b/vmm/src/vm.rs @@ -1173,18 +1173,22 @@ impl Vm { } #[cfg(any(target_arch = "aarch64", target_arch = "riscv64"))] - fn load_firmware(mut firmware: &File, memory_manager: Arc>) -> Result<()> { + fn load_firmware( + mut firmware: &File, + memory_manager: Arc>, + ) -> Result { let uefi_flash = memory_manager.lock().as_ref().unwrap().uefi_flash(); let mem = uefi_flash.memory(); arch::uefi::load_uefi(mem.deref(), arch::layout::UEFI_START, &mut firmware) .map_err(Error::UefiLoad)?; - Ok(()) + Ok(EntryPoint { + entry_addr: arch::layout::UEFI_START, + }) } #[cfg(any(target_arch = "aarch64", target_arch = "riscv64"))] fn load_kernel( - firmware: Option, - kernel: Option, + mut kernel: File, memory_manager: Arc>, ) -> Result { let guest_memory = memory_manager.lock().as_ref().unwrap().guest_memory(); @@ -1192,32 +1196,25 @@ impl Vm { let alignment = 0x20_0000; let aligned_kernel_addr = (arch::layout::KERNEL_START.0 + (alignment - 1)) & !(alignment - 1); - let entry_addr = match (firmware, kernel) { - (None, Some(mut kernel)) => { - match linux_loader::loader::pe::PE::load( - mem.deref(), - Some(GuestAddress(aligned_kernel_addr)), - &mut kernel, - None, - ) { - Ok(entry_addr) => entry_addr.kernel_load, - // Try to load the binary as kernel PE file at first. - // If failed, retry to load it as UEFI binary. - // As the UEFI binary is formatless, it must be the last option to try. - Err(linux_loader::loader::Error::Pe(InvalidImageMagicNumber)) => { - Self::load_firmware(&kernel, memory_manager)?; - arch::layout::UEFI_START - } - Err(e) => { - return Err(Error::KernelLoad(e)); - } + let entry_addr = { + match linux_loader::loader::pe::PE::load( + mem.deref(), + Some(GuestAddress(aligned_kernel_addr)), + &mut kernel, + None, + ) { + Ok(entry_addr) => entry_addr.kernel_load, + // Try to load the binary as kernel PE file at first. + // If failed, retry to load it as UEFI binary. + // As the UEFI binary is formatless, it must be the last option to try. + Err(linux_loader::loader::Error::Pe(InvalidImageMagicNumber)) => { + Self::load_firmware(&kernel, memory_manager)?; + arch::layout::UEFI_START + } + Err(e) => { + return Err(Error::KernelLoad(e)); } } - (Some(firmware), None) => { - Self::load_firmware(&firmware, memory_manager)?; - arch::layout::UEFI_START - } - _ => unreachable!("Unsupported boot configuration: programming error from 'PayloadConfigError::validate()'"), }; Ok(EntryPoint { entry_addr }) @@ -1254,6 +1251,9 @@ impl Vm { Ok(entry_point) } + /// Loads the kernel or a firmware file. + /// + /// For x86_64, the boot path is the same. #[cfg(target_arch = "x86_64")] fn load_kernel( mut kernel: File, @@ -1357,11 +1357,11 @@ impl Vm { match (&payload.firmware, &payload.kernel) { (Some(firmware), None) => { let firmware = File::open(firmware).map_err(Error::FirmwareFile)?; - Self::load_kernel(Some(firmware), None, memory_manager) + Self::load_firmware(&firmware, memory_manager) } (None, Some(kernel)) => { let kernel = File::open(kernel).map_err(Error::KernelFile)?; - Self::load_kernel(None, Some(kernel), memory_manager) + Self::load_kernel(kernel, memory_manager) } _ => unreachable!("Unsupported boot configuration: programming error from 'PayloadConfigError::validate()'"), } From 3864230430a44d9981ea22275b5ae47f1561e1f4 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Thu, 21 Aug 2025 15:36:19 +0000 Subject: [PATCH 122/294] build: Bump autocfg from 1.4.0 to 1.5.0 Bumps [autocfg](https://github.com/cuviper/autocfg) from 1.4.0 to 1.5.0. - [Commits](https://github.com/cuviper/autocfg/compare/1.4.0...1.5.0) --- updated-dependencies: - dependency-name: autocfg dependency-version: 1.5.0 dependency-type: indirect update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] --- Cargo.lock | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index a18feab93b..3ac9fdb4dc 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -261,9 +261,9 @@ checksum = "1505bd5d3d116872e7271a6d4e16d81d0c8570876c8de68093a09ac269d8aac0" [[package]] name = "autocfg" -version = "1.4.0" +version = "1.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ace50bade8e6234aa140d9a2f552bbee1db4d353f69b8217bc503490fc1a9f26" +checksum = "c08606f8c3cbf4ce6ec8e28fb0014a2c086708fe954eaa885384a6165172e7e8" [[package]] name = "backtrace" From b8be33dff7694f983ac3d1eafc75c1065eb691c6 Mon Sep 17 00:00:00 2001 From: Philipp Schuster Date: Tue, 17 Jun 2025 12:37:13 +0200 Subject: [PATCH 123/294] net_util: tap: remove needless copy One can call `to_vec()` anyway if one needs an owned copy. This change further helps to prevent needless copies in upcoming changes. Signed-off-by: Philipp Schuster On-behalf-of: SAP philipp.schuster@sap.com --- net_util/src/open_tap.rs | 2 +- net_util/src/tap.rs | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/net_util/src/open_tap.rs b/net_util/src/open_tap.rs index e711529ca7..e02e3b5e8f 100644 --- a/net_util/src/open_tap.rs +++ b/net_util/src/open_tap.rs @@ -135,7 +135,7 @@ pub fn open_tap( // same device. tap = open_tap_rx_q_0(if_name, ip_addr, netmask, host_mac, mtu, num_rx_q, flags)?; // Set the name of the tap device we open in subsequent iterations. - ifname = String::from_utf8(tap.get_if_name()).unwrap(); + ifname = String::from_utf8(tap.get_if_name().to_vec()).unwrap(); } else { tap = Tap::open_named(ifname.as_str(), num_rx_q, flags).map_err(Error::TapOpen)?; diff --git a/net_util/src/tap.rs b/net_util/src/tap.rs index 1dc0b7f486..bc841de119 100644 --- a/net_util/src/tap.rs +++ b/net_util/src/tap.rs @@ -481,8 +481,8 @@ impl Tap { ifreq } - pub fn get_if_name(&self) -> Vec { - self.if_name.clone() + pub fn get_if_name(&self) -> &[u8] { + &self.if_name } #[cfg(fuzzing)] From a51998605a3d003a59366e0203b1735d533b5521 Mon Sep 17 00:00:00 2001 From: Philipp Schuster Date: Fri, 27 Jun 2025 14:09:15 +0200 Subject: [PATCH 124/294] net_util: add Tap::if_name_as_str Signed-off-by: Philipp Schuster On-behalf-of: SAP philipp.schuster@sap.com --- net_util/src/open_tap.rs | 2 +- net_util/src/tap.rs | 25 ++++++++++++++++++++++++- 2 files changed, 25 insertions(+), 2 deletions(-) diff --git a/net_util/src/open_tap.rs b/net_util/src/open_tap.rs index e02e3b5e8f..21c48f8392 100644 --- a/net_util/src/open_tap.rs +++ b/net_util/src/open_tap.rs @@ -135,7 +135,7 @@ pub fn open_tap( // same device. tap = open_tap_rx_q_0(if_name, ip_addr, netmask, host_mac, mtu, num_rx_q, flags)?; // Set the name of the tap device we open in subsequent iterations. - ifname = String::from_utf8(tap.get_if_name().to_vec()).unwrap(); + ifname = tap.if_name_as_str().to_string(); } else { tap = Tap::open_named(ifname.as_str(), num_rx_q, flags).map_err(Error::TapOpen)?; diff --git a/net_util/src/tap.rs b/net_util/src/tap.rs index bc841de119..6d90293b87 100644 --- a/net_util/src/tap.rs +++ b/net_util/src/tap.rs @@ -481,10 +481,33 @@ impl Tap { ifreq } - pub fn get_if_name(&self) -> &[u8] { + /// Returns the raw bytes of the interface name, which may or may not be + /// valid UTF-8. + pub fn if_name_as_bytes(&self) -> &[u8] { &self.if_name } + /// Returns the interface name as a string, truncated at the first NUL byte + /// if present. + /// + /// # Panic + /// Panics if the interface name is not encoded as valid UTF-8. This can + /// only be caused by unrecoverable internal errors as users and management + /// software are only allowed to specify interfaces names as Rust strings, + /// thus valid UTF-8. Also, self-generated interface names form CHV are + /// also always created from Rust strings, thus valid UTF-8. + pub fn if_name_as_str(&self) -> &str { + // All bytes until first NUL. + let nul_terminated = self + .if_name_as_bytes() + .split(|&b| b == 0) + .next() + .unwrap_or(&[]); + + // Panicking here is fine, see function documentation. + std::str::from_utf8(nul_terminated).expect("Tap interface name should be valid UTF-8") + } + #[cfg(fuzzing)] pub fn new_for_fuzzing(tap_file: File, if_name: Vec) -> Self { Tap { tap_file, if_name } From 6e002defe2c9baf1df08179c91db73eedaa49889 Mon Sep 17 00:00:00 2001 From: Anirudh Rayabharam Date: Sat, 23 Aug 2025 14:52:10 +0000 Subject: [PATCH 125/294] tests: remove redundant arch check in bzimage test test_direct_kernel_boot_bzimage runs only on x86, so the cfg!() branch for selecting grep_cmd is unnecessary. Remove it for clarity. Signed-off-by: Anirudh Rayabharam --- tests/integration.rs | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/tests/integration.rs b/tests/integration.rs index 13e612485b..4e74cf0fc8 100644 --- a/tests/integration.rs +++ b/tests/integration.rs @@ -3342,11 +3342,7 @@ mod common_parallel { assert_eq!(guest.get_cpu_count().unwrap_or_default(), 1); assert!(guest.get_total_memory().unwrap_or_default() > 480_000); - let grep_cmd = if cfg!(target_arch = "x86_64") { - "grep -c PCI-MSI /proc/interrupts" - } else { - "grep -c ITS-PCI-MSIX /proc/interrupts" - }; + let grep_cmd = "grep -c PCI-MSI /proc/interrupts"; assert_eq!( guest .ssh_command(grep_cmd) From 6e6cf099ef1124561a0004dec23a30e4bcd45781 Mon Sep 17 00:00:00 2001 From: Peter Oskolkov Date: Mon, 25 Aug 2025 22:28:33 +0000 Subject: [PATCH 126/294] hypervisor: enable x2apic API in kvm on x86_64 Also tweak extended interrupt handlings, as needed. Most credit should go to Neel Natu, who figured out the magic bits needed to make things work and provided detailed comments. This patch is still NOOP, as VM config allows only up to 254 vCPUs on x86_64. Note: changes in this and related previous patches/PRs have only been tested on Linux hosts running on Intel x86_64 hardware. Signed-off-by: Barret Rhoden Signed-off-by: Neel Natu Signed-off-by: Ofir Weisse Signed-off-by: Peter Oskolkov --- hypervisor/src/kvm/mod.rs | 85 ++++++++++++++++++++++++++++++++++++--- hypervisor/src/vm.rs | 10 +++++ 2 files changed, 89 insertions(+), 6 deletions(-) diff --git a/hypervisor/src/kvm/mod.rs b/hypervisor/src/kvm/mod.rs index f92720779e..4204d4f83d 100644 --- a/hypervisor/src/kvm/mod.rs +++ b/hypervisor/src/kvm/mod.rs @@ -55,7 +55,8 @@ pub mod x86_64; #[cfg(target_arch = "x86_64")] use kvm_bindings::{ kvm_enable_cap, kvm_msr_entry, MsrList, KVM_CAP_HYPERV_SYNIC, KVM_CAP_SPLIT_IRQCHIP, - KVM_GUESTDBG_USE_HW_BP, + KVM_CAP_X2APIC_API, KVM_GUESTDBG_USE_HW_BP, KVM_X2APIC_API_DISABLE_BROADCAST_QUIRK, + KVM_X2APIC_API_USE_32BIT_IDS, }; #[cfg(target_arch = "x86_64")] use x86_64::check_required_kvm_extensions; @@ -492,6 +493,50 @@ impl KvmVm { pub fn check_extension(&self, c: Cap) -> bool { self.fd.check_extension(c) } + + #[cfg(target_arch = "x86_64")] + /// Translates the MSI extended destination ID bits according to the logic + /// found in the Linux kernel's KVM MSI handling in kvm_msi_to_lapic_irq()/x86_msi_msg_get_destid(): + /// https://github.com/torvalds/linux/blob/3957a5720157264dcc41415fbec7c51c4000fc2d/arch/x86/kvm/irq.c#L266 + /// https://github.com/torvalds/linux/blob/3957a5720157264dcc41415fbec7c51c4000fc2d/arch/x86/kernel/apic/apic.c#L2306 + /// + /// This function moves bits [11, 5] from `address_lo` to bits [46, 40] in the combined 64-bit + /// address, but only if the Remappable Format (RF) bit (bit 4) in `address_lo` is + /// not set and `address_hi` is zero. + /// + /// The function is roughly equivalent to `uint64_t kvm_swizzle_msi_ext_dest_id(uint64_t address)` in + /// qemu/target/i386/kvm/kvm.c: + /// https://github.com/qemu/qemu/blob/88f72048d2f5835a1b9eaba690c7861393aef283/target/i386/kvm/kvm.c#L6258 + fn translate_msi_ext_dest_id(mut address_lo: u32, mut address_hi: u32) -> (u32, u32) { + // Mask for extracting the RF (Remappable Format) bit from address_lo. + // In the MSI specification, this is bit 4. See + // VT-d spec section "Interrupt Requests in Remappable Format" + const REMAPPABLE_FORMAT_BIT_MASK: u32 = 0x10; + let remappable_format_bit_is_set = (address_lo & REMAPPABLE_FORMAT_BIT_MASK) != 0; + + // Only perform the bit swizzling if the RF bit is unset and the upper + // 32 bits of the address are all zero. This identifies the legacy format. + if address_hi == 0 && !remappable_format_bit_is_set { + // "Move" the bits [11,5] to bits [46,40]. This is a shift of 35 bits, but + // since address is already split up into lo and hi, it's only a shift of + // 3 (35 - 32) within hi. + // "Move" via getting the bits via mask, zeroing out that range, and then + // ORing them back in at the correct location. The destination was already + // checked to be all zeroes. + const EXT_ID_MASK: u32 = 0xfe0; + const EXT_ID_SHIFT: u32 = 3; + let ext_id = address_lo & EXT_ID_MASK; + address_lo &= !EXT_ID_MASK; + address_hi |= ext_id << EXT_ID_SHIFT; + } + + (address_lo, address_hi) + } + + #[cfg(not(target_arch = "x86_64"))] + fn translate_msi_ext_dest_id(address_lo: u32, address_hi: u32) -> (u32, u32) { + (address_lo, address_hi) + } } /// Implementation of Vm trait for KVM @@ -647,8 +692,12 @@ impl vm::Vm for KvmVm { ..Default::default() }; - kvm_route.u.msi.address_lo = cfg.low_addr; - kvm_route.u.msi.address_hi = cfg.high_addr; + let (address_lo, address_hi) = + Self::translate_msi_ext_dest_id(cfg.low_addr, cfg.high_addr); + + kvm_route.u.msi.address_lo = address_lo; + kvm_route.u.msi.address_hi = address_hi; + kvm_route.u.msi.data = cfg.data; if self.check_extension(crate::kvm::Cap::MsiDevid) { @@ -822,6 +871,28 @@ impl vm::Vm for KvmVm { Ok(()) } + #[cfg(target_arch = "x86_64")] + fn enable_x2apic_api(&self) -> vm::Result<()> { + // From https://docs.kernel.org/virt/kvm/api.html: + // On x86, kvm_msi::address_hi is ignored unless the KVM_X2APIC_API_USE_32BIT_IDS feature of + // KVM_CAP_X2APIC_API capability is enabled. If it is enabled, address_hi bits 31-8 + // provide bits 31-8 of the destination id. Bits 7-0 of address_hi must be zero. + + // Thus KVM_X2APIC_API_USE_32BIT_IDS in combination with KVM_FEATURE_MSI_EXT_DEST_ID allows + // the guest to target interrupts to cpus with APIC IDs > 254. + + let mut cap = kvm_enable_cap { + cap: KVM_CAP_X2APIC_API, + ..Default::default() + }; + cap.args[0] = + (KVM_X2APIC_API_USE_32BIT_IDS | KVM_X2APIC_API_DISABLE_BROADCAST_QUIRK) as u64; + self.fd + .enable_cap(&cap) + .map_err(|e| vm::HypervisorVmError::EnableX2ApicApi(e.into()))?; + Ok(()) + } + #[cfg(target_arch = "x86_64")] fn enable_sgx_attribute(&self, file: File) -> vm::Result<()> { let mut cap = kvm_enable_cap { @@ -1359,7 +1430,7 @@ impl cpu::Vcpu for KvmVcpu { let mut state = kvm_regs::default(); let mut off = offset_of!(user_pt_regs, regs); // There are 31 user_pt_regs: - // https://elixir.free-electrons.com/linux/v4.14.174/source/arch/arm64/include/uapi/asm/ptrace.h#L72 + // https://elixir.bootlin.com/linux/v4.14.174/source/arch/arm64/include/uapi/asm/ptrace.h#L72 // These actually are the general-purpose registers of the Armv8-a // architecture (i.e x0-x30 if used as a 64bit register or w0-30 when used as a 32bit register). for i in 0..31 { @@ -1439,7 +1510,7 @@ impl cpu::Vcpu for KvmVcpu { } // Now moving on to floating point registers which are stored in the user_fpsimd_state in the kernel: - // https://elixir.free-electrons.com/linux/v4.9.62/source/arch/arm64/include/uapi/asm/kvm.h#L53 + // https://elixir.bootlin.com/linux/v4.9.62/source/arch/arm64/include/uapi/asm/kvm.h#L53 let mut off = offset_of!(kvm_regs, fp_regs.vregs); for i in 0..32 { let mut bytes = [0_u8; 16]; @@ -2350,8 +2421,10 @@ impl cpu::Vcpu for KvmVcpu { .map_err(|e| cpu::HypervisorCpuError::SetRiscvCoreRegister(e.into()))?; // Last mandatory thing to set -> the address pointing to the FDT (also called DTB). + // + // In an earlier version of https://www.kernel.org/doc/Documentation/arch/riscv/boot.rst: // "The device tree blob (dtb) must be placed on an 8-byte boundary and must - // not exceed 64 kilobytes in size." -> https://www.kernel.org/doc/Documentation/arch/riscv/boot.txt. + // not exceed 64 kilobytes in size." let a1 = offset_of!(kvm_riscv_core, regs.a1); self.fd .lock() diff --git a/hypervisor/src/vm.rs b/hypervisor/src/vm.rs index 25a80666ae..113c2001fe 100644 --- a/hypervisor/src/vm.rs +++ b/hypervisor/src/vm.rs @@ -126,6 +126,11 @@ pub enum HypervisorVmError { #[error("Failed to enable split Irq")] EnableSplitIrq(#[source] anyhow::Error), /// + /// Enable x2apic API error + /// + #[error("Failed to enable x2apic API")] + EnableX2ApicApi(#[source] anyhow::Error), + /// /// Enable SGX attribute error /// #[error("Failed to enable SGX attribute")] @@ -440,6 +445,11 @@ pub trait Vm: Send + Sync + Any { fn gain_page_access(&self, _gpa: u64, _size: u32) -> Result<()> { Ok(()) } + + #[cfg(all(feature = "kvm", target_arch = "x86_64"))] + fn enable_x2apic_api(&self) -> Result<()> { + unimplemented!("x2Apic is only supported on KVM/Linux hosts") + } } pub trait VmOps: Send + Sync { From 4a1eb0c4856832f12b4da29afa7a762608478304 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Tue, 26 Aug 2025 13:56:36 +0000 Subject: [PATCH 127/294] build: Bump cc from 1.2.27 to 1.2.34 Bumps [cc](https://github.com/rust-lang/cc-rs) from 1.2.27 to 1.2.34. - [Release notes](https://github.com/rust-lang/cc-rs/releases) - [Changelog](https://github.com/rust-lang/cc-rs/blob/main/CHANGELOG.md) - [Commits](https://github.com/rust-lang/cc-rs/compare/cc-v1.2.27...cc-v1.2.34) --- updated-dependencies: - dependency-name: cc dependency-version: 1.2.34 dependency-type: indirect update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] --- Cargo.lock | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 3ac9fdb4dc..af8984205d 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -351,9 +351,9 @@ checksum = "1fd0f2584146f6f2ef48085050886acf353beff7305ebd1ae69500e27c67f64b" [[package]] name = "cc" -version = "1.2.27" +version = "1.2.34" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d487aa071b5f64da6f19a3e848e3578944b726ee5a4854b82172f02aa876bfdc" +checksum = "42bc4aea80032b7bf409b0bc7ccad88853858911b7713a8062fdc0623867bedc" dependencies = [ "shlex", ] From 92370e8ff18b44f42cd155eec326f9e0a729a507 Mon Sep 17 00:00:00 2001 From: Songqian Li Date: Mon, 25 Aug 2025 14:59:04 +0800 Subject: [PATCH 128/294] block: Using feature bits to check the read-only flag This patch changes the read-only check using acked features bit, which will help to check more features. Signed-off-by: Songqian Li --- block/src/lib.rs | 3 +++ virtio-devices/src/block.rs | 34 ++++++++++++++++++++++++---------- 2 files changed, 27 insertions(+), 10 deletions(-) diff --git a/block/src/lib.rs b/block/src/lib.rs index 4e10771c4d..d988a13992 100644 --- a/block/src/lib.rs +++ b/block/src/lib.rs @@ -139,6 +139,8 @@ pub enum ExecuteError { Read(#[source] GuestMemoryError), #[error("Failed to read_exact")] ReadExact(#[source] io::Error), + #[error("Can't execute an operation other than `read` on a read-only device")] + ReadOnly, #[error("Failed to seek")] Seek(#[source] io::Error), #[error("Failed to write")] @@ -168,6 +170,7 @@ impl ExecuteError { ExecuteError::Flush(_) => VIRTIO_BLK_S_IOERR, ExecuteError::Read(_) => VIRTIO_BLK_S_IOERR, ExecuteError::ReadExact(_) => VIRTIO_BLK_S_IOERR, + ExecuteError::ReadOnly => VIRTIO_BLK_S_IOERR, ExecuteError::Seek(_) => VIRTIO_BLK_S_IOERR, ExecuteError::Write(_) => VIRTIO_BLK_S_IOERR, ExecuteError::WriteAll(_) => VIRTIO_BLK_S_IOERR, diff --git a/virtio-devices/src/block.rs b/virtio-devices/src/block.rs index ff28f3ba76..7e337ad5bf 100644 --- a/virtio-devices/src/block.rs +++ b/virtio-devices/src/block.rs @@ -20,7 +20,7 @@ use std::{io, result}; use anyhow::anyhow; use block::async_io::{AsyncIo, AsyncIoError, DiskFile}; use block::fcntl::{get_lock_state, LockError, LockType}; -use block::{build_serial, fcntl, Request, RequestType, VirtioBlockConfig}; +use block::{build_serial, fcntl, ExecuteError, Request, RequestType, VirtioBlockConfig}; use rate_limiter::group::{RateLimiterGroup, RateLimiterGroupHandle}; use rate_limiter::TokenType; use seccompiler::SeccompAction; @@ -144,11 +144,25 @@ struct BlockEpollHandler { inflight_requests: VecDeque<(u16, Request)>, rate_limiter: Option, access_platform: Option>, - read_only: bool, host_cpus: Option>, + acked_features: u64, +} + +fn has_feature(features: u64, feature_flag: u64) -> bool { + (features & (1u64 << feature_flag)) != 0 } impl BlockEpollHandler { + fn check_request(features: u64, request_type: RequestType) -> result::Result<(), ExecuteError> { + if has_feature(features, VIRTIO_BLK_F_RO.into()) && request_type != RequestType::In { + // For virtio spec compliance + // "A device MUST set the status byte to VIRTIO_BLK_S_IOERR for a write request + // if the VIRTIO_BLK_F_RO feature if offered, and MUST NOT write any data." + return Err(ExecuteError::ReadOnly); + } + Ok(()) + } + fn process_queue_submit(&mut self) -> Result<()> { let queue = &mut self.queue; @@ -159,10 +173,8 @@ impl BlockEpollHandler { // For virtio spec compliance // "A device MUST set the status byte to VIRTIO_BLK_S_IOERR for a write request // if the VIRTIO_BLK_F_RO feature if offered, and MUST NOT write any data." - if self.read_only - && (request.request_type == RequestType::Out - || request.request_type == RequestType::Flush) - { + if let Err(e) = Self::check_request(self.acked_features, request.request_type) { + warn!("Request check failed: {:x?} {:?}", request, e); desc_chain .memory() .write_obj(VIRTIO_BLK_S_IOERR, request.status_addr) @@ -583,7 +595,6 @@ pub struct Block { seccomp_action: SeccompAction, rate_limiter: Option>, exit_evt: EventFd, - read_only: bool, serial: Vec, queue_affinity: BTreeMap>, } @@ -715,15 +726,18 @@ impl Block { seccomp_action, rate_limiter, exit_evt, - read_only, serial, queue_affinity, }) } + fn read_only(&self) -> bool { + has_feature(self.features(), VIRTIO_BLK_F_RO.into()) + } + /// Tries to set an advisory lock for the corresponding disk image. pub fn try_lock_image(&mut self) -> Result<()> { - let lock_type = match self.read_only { + let lock_type = match self.read_only() { true => LockType::Read, false => LockType::Write, }; @@ -904,8 +918,8 @@ impl VirtioDevice for Block { .transpose() .unwrap(), access_platform: self.common.access_platform.clone(), - read_only: self.read_only, host_cpus: self.queue_affinity.get(&queue_idx).cloned(), + acked_features: self.common.acked_features, }; let paused = self.common.paused.clone(); From bec975a7fc45f488f6c58143ea915735d1bc8cb0 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Tue, 26 Aug 2025 17:21:55 +0000 Subject: [PATCH 129/294] build: Bump wait-timeout from 0.2.0 to 0.2.1 Bumps [wait-timeout](https://github.com/alexcrichton/wait-timeout) from 0.2.0 to 0.2.1. - [Commits](https://github.com/alexcrichton/wait-timeout/compare/0.2.0...0.2.1) --- updated-dependencies: - dependency-name: wait-timeout dependency-version: 0.2.1 dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] --- Cargo.lock | 4 ++-- Cargo.toml | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index af8984205d..23bf435375 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2514,9 +2514,9 @@ dependencies = [ [[package]] name = "wait-timeout" -version = "0.2.0" +version = "0.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9f200f5b12eb75f8c1ed65abd4b2db8a6e1b138a20de009dacee265a2498f3f6" +checksum = "09ac3b126d3914f9849036f826e054cbabdc8519970b8998ddaf3b5bd3c65f11" dependencies = [ "libc", ] diff --git a/Cargo.toml b/Cargo.toml index 21e9cf1348..25a142bb2d 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -150,5 +150,5 @@ log = "0.4.22" signal-hook = "0.3.18" thiserror = "2.0.12" uuid = { version = "1.17.0" } -wait-timeout = "0.2.0" +wait-timeout = "0.2.1" zerocopy = { version = "0.8.26", default-features = false } From 5357761c37080386cf02c19acdddb829ba1473f2 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Tue, 26 Aug 2025 19:46:32 +0000 Subject: [PATCH 130/294] build: Bump async-io from 2.4.1 to 2.5.0 Bumps [async-io](https://github.com/smol-rs/async-io) from 2.4.1 to 2.5.0. - [Release notes](https://github.com/smol-rs/async-io/releases) - [Changelog](https://github.com/smol-rs/async-io/blob/master/CHANGELOG.md) - [Commits](https://github.com/smol-rs/async-io/compare/v2.4.1...v2.5.0) --- updated-dependencies: - dependency-name: async-io dependency-version: 2.5.0 dependency-type: indirect update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] --- Cargo.lock | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 23bf435375..a0ccdabcec 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -161,9 +161,9 @@ dependencies = [ [[package]] name = "async-io" -version = "2.4.1" +version = "2.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1237c0ae75a0f3765f58910ff9cdd0a12eeb39ab2f4c7de23262f337f0aacbb3" +checksum = "19634d6336019ef220f09fd31168ce5c184b295cbf80345437cc36094ef223ca" dependencies = [ "async-lock", "cfg-if", @@ -174,8 +174,7 @@ dependencies = [ "polling", "rustix 1.0.7", "slab", - "tracing", - "windows-sys 0.59.0", + "windows-sys 0.60.2", ] [[package]] From 9c85fbb1afefad55ebd3dfa37c8e09261a969b39 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Wed, 27 Aug 2025 07:06:48 +0000 Subject: [PATCH 131/294] build: Bump proc-macro2 from 1.0.95 to 1.0.101 Bumps [proc-macro2](https://github.com/dtolnay/proc-macro2) from 1.0.95 to 1.0.101. - [Release notes](https://github.com/dtolnay/proc-macro2/releases) - [Commits](https://github.com/dtolnay/proc-macro2/compare/1.0.95...1.0.101) --- updated-dependencies: - dependency-name: proc-macro2 dependency-version: 1.0.101 dependency-type: indirect update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] --- Cargo.lock | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index a0ccdabcec..5ca831622a 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1678,9 +1678,9 @@ dependencies = [ [[package]] name = "proc-macro2" -version = "1.0.95" +version = "1.0.101" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "02b3e5e68a3a1a02aad3ec490a98007cbc13c37cbe84a3cd7b8e406d76e7f778" +checksum = "89ae43fd86e4158d6db51ad8e2b80f313af9cc74f5c0e03ccb87de09998732de" dependencies = [ "unicode-ident", ] From 41930d11f7a6a5c098a09795857e2598fbfb9093 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Thu, 28 Aug 2025 23:02:04 +0000 Subject: [PATCH 132/294] build: Bump syn from 2.0.104 to 2.0.106 Bumps [syn](https://github.com/dtolnay/syn) from 2.0.104 to 2.0.106. - [Release notes](https://github.com/dtolnay/syn/releases) - [Commits](https://github.com/dtolnay/syn/compare/2.0.104...2.0.106) --- updated-dependencies: - dependency-name: syn dependency-version: 2.0.106 dependency-type: indirect update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] --- Cargo.lock | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 5ca831622a..0f88e6fdcd 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2021,9 +2021,9 @@ checksum = "7da8b5736845d9f2fcb837ea5d9e2628564b3b043a70948a3f0b778838c5fb4f" [[package]] name = "syn" -version = "2.0.104" +version = "2.0.106" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "17b6f705963418cdb9927482fa304bc562ece2fdd4f616084c50b7023b435a40" +checksum = "ede7c438028d4436d71104916910f5bb611972c5cfd7f89b8300a8186e6fada6" dependencies = [ "proc-macro2", "quote", From 8b2af1a2c841812a22be2d5eaffc37262f998a9d Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Thu, 28 Aug 2025 23:20:08 +0000 Subject: [PATCH 133/294] build: Bump crate-ci/typos from 1.35.5 to 1.35.6 Bumps [crate-ci/typos](https://github.com/crate-ci/typos) from 1.35.5 to 1.35.6. - [Release notes](https://github.com/crate-ci/typos/releases) - [Changelog](https://github.com/crate-ci/typos/blob/master/CHANGELOG.md) - [Commits](https://github.com/crate-ci/typos/compare/v1.35.5...v1.35.6) --- updated-dependencies: - dependency-name: crate-ci/typos dependency-version: 1.35.6 dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] --- .github/workflows/quality.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/quality.yaml b/.github/workflows/quality.yaml index 317fa84150..a37eefa240 100644 --- a/.github/workflows/quality.yaml +++ b/.github/workflows/quality.yaml @@ -167,4 +167,4 @@ jobs: steps: - uses: actions/checkout@v5 # Executes "typos ." - - uses: crate-ci/typos@v1.35.5 + - uses: crate-ci/typos@v1.35.6 From 67ab81874a5d35203580c9088719a91aa1d8c2bf Mon Sep 17 00:00:00 2001 From: Muminul Islam Date: Wed, 13 Aug 2025 00:21:50 +0000 Subject: [PATCH 134/294] block: virtio-devices: block: Clarify the return of execute_async() Instead of returning boolean return an struct of completion status so that it can be cached for batch submission. Signed-off-by: Bo Chen Signed-off-by: Muminul Islam --- block/src/lib.rs | 15 ++++++++++++--- virtio-devices/src/block.rs | 10 ++++++++-- 2 files changed, 20 insertions(+), 5 deletions(-) diff --git a/block/src/lib.rs b/block/src/lib.rs index d988a13992..560010e266 100644 --- a/block/src/lib.rs +++ b/block/src/lib.rs @@ -232,6 +232,11 @@ pub struct AlignedOperation { layout: Layout, } +pub struct ExecuteAsync { + // `true` if the execution will complete asynchronously + pub async_complete: bool, +} + #[derive(Debug)] pub struct Request { pub request_type: RequestType, @@ -397,7 +402,7 @@ impl Request { disk_image: &mut dyn AsyncIo, serial: &[u8], user_data: u64, - ) -> result::Result { + ) -> result::Result { let sector = self.sector; let request_type = self.request_type; let offset = (sector << SECTOR_SHIFT) as libc::off_t; @@ -473,6 +478,9 @@ impl Request { iovecs.push(iovec); } + let mut ret = ExecuteAsync { + async_complete: true, + }; // Queue operations expected to be submitted. match request_type { RequestType::In => { @@ -507,12 +515,13 @@ impl Request { } mem.write_slice(serial, data_addr) .map_err(ExecuteError::Write)?; - return Ok(false); + ret.async_complete = false; + return Ok(ret); } RequestType::Unsupported(t) => return Err(ExecuteError::Unsupported(t)), } - Ok(true) + Ok(ret) } pub fn complete_async(&mut self) -> result::Result<(), Error> { diff --git a/virtio-devices/src/block.rs b/virtio-devices/src/block.rs index 7e337ad5bf..f301294b4f 100644 --- a/virtio-devices/src/block.rs +++ b/virtio-devices/src/block.rs @@ -20,7 +20,9 @@ use std::{io, result}; use anyhow::anyhow; use block::async_io::{AsyncIo, AsyncIoError, DiskFile}; use block::fcntl::{get_lock_state, LockError, LockType}; -use block::{build_serial, fcntl, ExecuteError, Request, RequestType, VirtioBlockConfig}; +use block::{ + build_serial, fcntl, ExecuteAsync, ExecuteError, Request, RequestType, VirtioBlockConfig, +}; use rate_limiter::group::{RateLimiterGroup, RateLimiterGroupHandle}; use rate_limiter::TokenType; use seccompiler::SeccompAction; @@ -232,7 +234,11 @@ impl BlockEpollHandler { desc_chain.head_index() as u64, ); - if let Ok(true) = result { + if let Ok(ExecuteAsync { + async_complete: true, + .. + }) = result + { self.inflight_requests .push_back((desc_chain.head_index(), request)); } else { From 245bce23fa4d2ce7890ec3c51693bc097b8963ed Mon Sep 17 00:00:00 2001 From: Muminul Islam Date: Wed, 13 Aug 2025 00:43:36 +0000 Subject: [PATCH 135/294] block, virtio-devices: Support request submission in batch MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Cache and batch IO requests after parsing all items in the queue, improving performance—especially for small block sizes—by reducing per-request overhead. Introduced two methods in the AsyncIo trait for batch submission, with implementation in the raw disk backend. This method should be called during/after parsing all block IO requests in the available queue. If the batch submission is not enabled, by default it does the old way of submitting requests. Signed-off-by: Bo Chen Signed-off-by: Muminul Islam --- block/src/async_io.rs | 8 ++++++- block/src/lib.rs | 40 ++++++++++++++++++++++++++++------ virtio-devices/src/block.rs | 43 ++++++++++++++++++++++++++++++++++--- 3 files changed, 81 insertions(+), 10 deletions(-) diff --git a/block/src/async_io.rs b/block/src/async_io.rs index 3f37bd6e34..2a4891d140 100644 --- a/block/src/async_io.rs +++ b/block/src/async_io.rs @@ -8,7 +8,7 @@ use std::os::fd::{AsRawFd, OwnedFd, RawFd}; use thiserror::Error; use vmm_sys_util::eventfd::EventFd; -use crate::DiskTopology; +use crate::{BatchRequest, DiskTopology}; #[derive(Error, Debug)] pub enum DiskFileError { @@ -99,4 +99,10 @@ pub trait AsyncIo: Send { ) -> AsyncIoResult<()>; fn fsync(&mut self, user_data: Option) -> AsyncIoResult<()>; fn next_completed_request(&mut self) -> Option<(u64, i32)>; + fn batch_requests_enabled(&self) -> bool { + false + } + fn submit_batch_requests(&mut self, _batch_request: &[BatchRequest]) -> AsyncIoResult<()> { + Ok(()) + } } diff --git a/block/src/lib.rs b/block/src/lib.rs index 560010e266..3cca348b3b 100644 --- a/block/src/lib.rs +++ b/block/src/lib.rs @@ -232,9 +232,18 @@ pub struct AlignedOperation { layout: Layout, } +pub struct BatchRequest { + pub offset: libc::off_t, + pub iovecs: SmallVec<[libc::iovec; DEFAULT_DESCRIPTOR_VEC_SIZE]>, + pub user_data: u64, + pub request_type: RequestType, +} + pub struct ExecuteAsync { // `true` if the execution will complete asynchronously pub async_complete: bool, + // request need to be batched for submission if any + pub batch_request: Option, } #[derive(Debug)] @@ -480,6 +489,7 @@ impl Request { let mut ret = ExecuteAsync { async_complete: true, + batch_request: None, }; // Queue operations expected to be submitted. match request_type { @@ -490,14 +500,32 @@ impl Request { .bitmap() .mark_dirty(0, *data_len as usize); } - disk_image - .read_vectored(offset, &iovecs, user_data) - .map_err(ExecuteError::AsyncRead)?; + if disk_image.batch_requests_enabled() { + ret.batch_request = Some(BatchRequest { + offset, + iovecs, + user_data, + request_type, + }); + } else { + disk_image + .read_vectored(offset, &iovecs, user_data) + .map_err(ExecuteError::AsyncRead)?; + } } RequestType::Out => { - disk_image - .write_vectored(offset, &iovecs, user_data) - .map_err(ExecuteError::AsyncWrite)?; + if disk_image.batch_requests_enabled() { + ret.batch_request = Some(BatchRequest { + offset, + iovecs, + user_data, + request_type, + }); + } else { + disk_image + .write_vectored(offset, &iovecs, user_data) + .map_err(ExecuteError::AsyncWrite)?; + } } RequestType::Flush => { disk_image diff --git a/virtio-devices/src/block.rs b/virtio-devices/src/block.rs index f301294b4f..415ae3f365 100644 --- a/virtio-devices/src/block.rs +++ b/virtio-devices/src/block.rs @@ -167,6 +167,8 @@ impl BlockEpollHandler { fn process_queue_submit(&mut self) -> Result<()> { let queue = &mut self.queue; + let mut batch_requests = Vec::new(); + let mut batch_inflight_requests = Vec::new(); while let Some(mut desc_chain) = queue.pop_descriptor_chain(self.mem.memory()) { let mut request = Request::parse(&mut desc_chain, self.access_platform.as_ref()) @@ -236,11 +238,21 @@ impl BlockEpollHandler { if let Ok(ExecuteAsync { async_complete: true, - .. + batch_request, }) = result { - self.inflight_requests - .push_back((desc_chain.head_index(), request)); + if let Some(batch_request) = batch_request { + match batch_request.request_type { + RequestType::In | RequestType::Out => batch_requests.push(batch_request), + _ => { + unreachable!( + "Unexpected batch request type: {:?}", + request.request_type + ) + } + } + } + batch_inflight_requests.push((desc_chain.head_index(), request)); } else { let status = match result { Ok(_) => VIRTIO_BLK_S_OK, @@ -266,6 +278,31 @@ impl BlockEpollHandler { } } + match self.disk_image.submit_batch_requests(&batch_requests) { + Ok(()) => { + self.inflight_requests.extend(batch_inflight_requests); + } + Err(e) => { + // If batch submission fails, report VIRTIO_BLK_S_IOERR for all requests. + for (user_data, request) in batch_inflight_requests { + warn!( + "Request failed with batch submission: {:x?} {:?}", + request, e + ); + let desc_index = user_data; + let mem = self.mem.memory(); + mem.write_obj(VIRTIO_BLK_S_IOERR as u8, request.status_addr) + .map_err(Error::RequestStatus)?; + queue + .add_used(mem.deref(), desc_index, 0) + .map_err(Error::QueueAddUsed)?; + queue + .enable_notification(mem.deref()) + .map_err(Error::QueueEnableNotification)?; + } + } + } + Ok(()) } From c4bab33020557e644713c1cc6a8e29de78bcb4ea Mon Sep 17 00:00:00 2001 From: Muminul Islam Date: Wed, 13 Aug 2025 01:17:00 +0000 Subject: [PATCH 136/294] block: Enable request submission in batch when using io_uring Implement the batch submission function for raw disk, default it is enabled. After parsing the requests this method is called for better IO latency and bandwidth. Signed-off-by: Bo Chen Signed-off-by: Muminul Islam --- block/src/async_io.rs | 3 ++ block/src/raw_async.rs | 75 +++++++++++++++++++++++++++++++++++++++++- 2 files changed, 77 insertions(+), 1 deletion(-) diff --git a/block/src/async_io.rs b/block/src/async_io.rs index 2a4891d140..aa31c54367 100644 --- a/block/src/async_io.rs +++ b/block/src/async_io.rs @@ -79,6 +79,9 @@ pub enum AsyncIoError { /// Failed synchronizing file. #[error("Failed synchronizing file")] Fsync(#[source] std::io::Error), + /// Failed submitting batch requests. + #[error("Failed submitting batch requests: {0}")] + SubmitBatchRequests(#[source] std::io::Error), } pub type AsyncIoResult = std::result::Result; diff --git a/block/src/raw_async.rs b/block/src/raw_async.rs index 496445c6ad..b3c9882fbb 100644 --- a/block/src/raw_async.rs +++ b/block/src/raw_async.rs @@ -12,7 +12,7 @@ use vmm_sys_util::eventfd::EventFd; use crate::async_io::{ AsyncIo, AsyncIoError, AsyncIoResult, BorrowedDiskFd, DiskFile, DiskFileError, DiskFileResult, }; -use crate::DiskTopology; +use crate::{BatchRequest, DiskTopology, RequestType}; pub struct RawFileDisk { file: File, @@ -168,4 +168,77 @@ impl AsyncIo for RawFileAsync { .next() .map(|entry| (entry.user_data(), entry.result())) } + + fn batch_requests_enabled(&self) -> bool { + true + } + + fn submit_batch_requests(&mut self, batch_request: &[BatchRequest]) -> AsyncIoResult<()> { + if !self.batch_requests_enabled() { + return Ok(()); + } + + let (submitter, mut sq, _) = self.io_uring.split(); + let mut submitted = false; + + for req in batch_request { + match req.request_type { + RequestType::In => { + // SAFETY: we know the file descriptor is valid and we + // relied on vm-memory to provide the buffer address. + unsafe { + sq.push( + &opcode::Readv::new( + types::Fd(self.fd), + req.iovecs.as_ptr(), + req.iovecs.len() as u32, + ) + .offset(req.offset as u64) + .build() + .user_data(req.user_data), + ) + .map_err(|_| { + AsyncIoError::ReadVectored(Error::other("Submission queue is full")) + })? + }; + submitted = true; + } + RequestType::Out => { + // SAFETY: we know the file descriptor is valid and we + // relied on vm-memory to provide the buffer address. + unsafe { + sq.push( + &opcode::Writev::new( + types::Fd(self.fd), + req.iovecs.as_ptr(), + req.iovecs.len() as u32, + ) + .offset(req.offset as u64) + .build() + .user_data(req.user_data), + ) + .map_err(|_| { + AsyncIoError::WriteVectored(Error::other("Submission queue is full")) + })? + }; + submitted = true; + } + _ => { + unreachable!("Unexpected batch request type: {:?}", req.request_type) + } + } + } + + // Only submit if we actually queued something + if submitted { + // Update the submission queue and submit new operations to the + // io_uring instance. + sq.sync(); + submitter + .submit() + .map_err(AsyncIoError::SubmitBatchRequests)?; + } + + Ok(()) + } } From a9d680752278670a2db134b56c6cc8c2ef8d8c24 Mon Sep 17 00:00:00 2001 From: Muminul Islam Date: Fri, 8 Aug 2025 16:02:48 -0700 Subject: [PATCH 137/294] block: batch submit requests for fixed VHD Updated VHD async implementation to call the batch submit method via the raw async IO layer. Signed-off-by: Muminul Islam --- block/src/fixed_vhd_async.rs | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/block/src/fixed_vhd_async.rs b/block/src/fixed_vhd_async.rs index 6b51d070f8..ac02e21bf3 100644 --- a/block/src/fixed_vhd_async.rs +++ b/block/src/fixed_vhd_async.rs @@ -12,7 +12,7 @@ use crate::async_io::{ }; use crate::fixed_vhd::FixedVhd; use crate::raw_async::RawFileAsync; -use crate::BlockBackend; +use crate::{BatchRequest, BlockBackend}; pub struct FixedVhdDiskAsync(FixedVhd); @@ -106,4 +106,12 @@ impl AsyncIo for FixedVhdAsync { fn next_completed_request(&mut self) -> Option<(u64, i32)> { self.raw_file_async.next_completed_request() } + + fn batch_requests_enabled(&self) -> bool { + true + } + + fn submit_batch_requests(&mut self, batch_request: &[BatchRequest]) -> AsyncIoResult<()> { + self.raw_file_async.submit_batch_requests(batch_request) + } } From c3a809696a1fe97308785a562fb62b1c10e9b45c Mon Sep 17 00:00:00 2001 From: Philipp Schuster Date: Tue, 26 Aug 2025 08:07:04 +0200 Subject: [PATCH 138/294] docs: add Safety section to unsafe functions This step was done manually by searching for "unsafe fn" in the code base and adding corresponding Safety sections. `clippy::missing_safety_doc` only works for public functions but none of the corresponding functions is public. Signed-off-by: Philipp Schuster On-behalf-of: SAP philipp.schuster@sap.com --- net_util/src/tap.rs | 9 +++++++++ vmm/src/clone3.rs | 10 ++++++++++ 2 files changed, 19 insertions(+) diff --git a/net_util/src/tap.rs b/net_util/src/tap.rs index 6d90293b87..533d6a45b2 100644 --- a/net_util/src/tap.rs +++ b/net_util/src/tap.rs @@ -129,6 +129,9 @@ fn ipv6_mask_to_prefix(mask: Ipv6Addr) -> Result { } impl Tap { + /// # Safety + /// The caller should ensure to pass a valid file descriptor and valid + /// arguments for the `ioctl()` syscall. unsafe fn ioctl_with_mut_ref(fd: &F, req: c_ulong, arg: &mut T) -> Result<()> { let ret = ioctl_with_mut_ref(fd, req, arg); if ret < 0 { @@ -138,6 +141,9 @@ impl Tap { Ok(()) } + /// # Safety + /// The caller should ensure to pass a valid file descriptor and valid + /// arguments for the `ioctl()` syscall. unsafe fn ioctl_with_ref(fd: &F, req: c_ulong, arg: &T) -> Result<()> { let ret = ioctl_with_ref(fd, req, arg); if ret < 0 { @@ -147,6 +153,9 @@ impl Tap { Ok(()) } + /// # Safety + /// The caller should ensure to pass a valid file descriptor and valid + /// arguments for the `ioctl()` syscall. unsafe fn ioctl_with_val(fd: &F, req: c_ulong, arg: c_ulong) -> Result<()> { let ret = ioctl_with_val(fd, req, arg); if ret < 0 { diff --git a/vmm/src/clone3.rs b/vmm/src/clone3.rs index f08e5ad31c..0ab08126e1 100644 --- a/vmm/src/clone3.rs +++ b/vmm/src/clone3.rs @@ -22,6 +22,16 @@ pub struct clone_args { pub cgroup: u64, } +/// # Safety +/// `size` must have the proper size to match `args`. +/// Further, the caller needs to check the return value. +/// +/// # Return +/// - On success: +/// - Parent: child PID (`c_long`) +/// - Child: `0` +/// - On error: `-1` and `errno` is set +#[must_use] pub unsafe fn clone3(args: &mut clone_args, size: size_t) -> c_long { syscall(SYS_clone3, args, size) } From f68880600ca9c96525e367a55409e1889256e304 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Fri, 29 Aug 2025 23:19:16 +0000 Subject: [PATCH 139/294] build: Bump crate-ci/typos from 1.35.6 to 1.35.7 Bumps [crate-ci/typos](https://github.com/crate-ci/typos) from 1.35.6 to 1.35.7. - [Release notes](https://github.com/crate-ci/typos/releases) - [Changelog](https://github.com/crate-ci/typos/blob/master/CHANGELOG.md) - [Commits](https://github.com/crate-ci/typos/compare/v1.35.6...v1.35.7) --- updated-dependencies: - dependency-name: crate-ci/typos dependency-version: 1.35.7 dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] --- .github/workflows/quality.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/quality.yaml b/.github/workflows/quality.yaml index a37eefa240..08922bdaf2 100644 --- a/.github/workflows/quality.yaml +++ b/.github/workflows/quality.yaml @@ -167,4 +167,4 @@ jobs: steps: - uses: actions/checkout@v5 # Executes "typos ." - - uses: crate-ci/typos@v1.35.6 + - uses: crate-ci/typos@v1.35.7 From 456f9b3871f4f2e0b79b50022a678be4c2171268 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Fri, 29 Aug 2025 23:01:37 +0000 Subject: [PATCH 140/294] build: Bump bitflags from 2.9.2 to 2.9.3 Bumps [bitflags](https://github.com/bitflags/bitflags) from 2.9.2 to 2.9.3. - [Release notes](https://github.com/bitflags/bitflags/releases) - [Changelog](https://github.com/bitflags/bitflags/blob/main/CHANGELOG.md) - [Commits](https://github.com/bitflags/bitflags/compare/2.9.2...2.9.3) --- updated-dependencies: - dependency-name: bitflags dependency-version: 2.9.3 dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] --- Cargo.lock | 28 ++++++++++++++-------------- Cargo.toml | 2 +- 2 files changed, 15 insertions(+), 15 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 0f88e6fdcd..e4efc129cf 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -298,9 +298,9 @@ checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a" [[package]] name = "bitflags" -version = "2.9.2" +version = "2.9.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6a65b545ab31d687cff52899d4890855fec459eb6afe0da6417b8a18da87aa29" +checksum = "34efbcccd345379ca2868b2b2c9d3782e9cc58ba87bc7d79d5b53d9c9ae6f25d" [[package]] name = "block" @@ -526,7 +526,7 @@ dependencies = [ "anyhow", "arch", "bitfield-struct", - "bitflags 2.9.2", + "bitflags 2.9.3", "byteorder", "event_monitor", "hypervisor", @@ -639,7 +639,7 @@ version = "4.3.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "74351c3392ea1ff6cd2628e0042d268ac2371cb613252ff383b6dfa50d22fa79" dependencies = [ - "bitflags 2.9.2", + "bitflags 2.9.3", "libc", ] @@ -828,7 +828,7 @@ version = "0.7.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "71d66e32caf5dd59f561be0143e413e01d651bd8498eb9aa0be8c482c81c8d31" dependencies = [ - "bitflags 2.9.2", + "bitflags 2.9.3", "cfg-if", "log", "managed", @@ -1076,7 +1076,7 @@ version = "0.22.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b702df98508cb63ad89dd9beb9f6409761b30edca10d48e57941d3f11513a006" dependencies = [ - "bitflags 2.9.2", + "bitflags 2.9.3", "kvm-bindings", "libc", "vmm-sys-util", @@ -1111,7 +1111,7 @@ version = "0.1.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c0ff37bd590ca25063e35af745c343cb7a0271906fb7b37e4813e8f79f00268d" dependencies = [ - "bitflags 2.9.2", + "bitflags 2.9.3", "libc", ] @@ -1292,7 +1292,7 @@ version = "0.30.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "74523f3a35e05aba87a1d978330aef40f67b0304ac79c1c00b294c9830543db6" dependencies = [ - "bitflags 2.9.2", + "bitflags 2.9.3", "cfg-if", "cfg_aliases", "libc", @@ -1833,7 +1833,7 @@ version = "0.38.44" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "fdb5bc1ae2baa591800df16c9ca78619bf65c0488b41b96ccec5d11220d8c154" dependencies = [ - "bitflags 2.9.2", + "bitflags 2.9.3", "errno", "libc", "linux-raw-sys 0.4.15", @@ -1846,7 +1846,7 @@ version = "1.0.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c71e83d6afe7ff64890ec6b71d6a69bb8a610ab78ce364b3352876bb4c801266" dependencies = [ - "bitflags 2.9.2", + "bitflags 2.9.3", "errno", "libc", "linux-raw-sys 0.9.4", @@ -2258,7 +2258,7 @@ version = "0.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ed81c5ed8224d468a322e923777ed0615cad433fe61177126098af995f89cecf" dependencies = [ - "bitflags 2.9.2", + "bitflags 2.9.3", "libc", "log", "serde", @@ -2276,7 +2276,7 @@ version = "0.14.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2a4dcad85a129d97d5d4b2f3c47a4affdeedd76bdcd02094bcb5d9b76cac2d05" dependencies = [ - "bitflags 2.9.2", + "bitflags 2.9.3", "libc", "uuid", "vm-memory", @@ -2449,7 +2449,7 @@ dependencies = [ "acpi_tables", "anyhow", "arch", - "bitflags 2.9.2", + "bitflags 2.9.3", "block", "blocking", "cfg-if", @@ -2851,7 +2851,7 @@ version = "0.39.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6f42320e61fe2cfd34354ecb597f86f413484a798ba44a8ca1165c58d42da6c1" dependencies = [ - "bitflags 2.9.2", + "bitflags 2.9.3", ] [[package]] diff --git a/Cargo.toml b/Cargo.toml index 25a142bb2d..8f49a0da80 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -136,7 +136,7 @@ serde_with = { version = "3.14.0", default-features = false } # other crates anyhow = "1.0.98" -bitflags = "2.9.2" +bitflags = "2.9.3" byteorder = "1.5.0" cfg-if = "1.0.0" clap = "4.5.13" From c38596d6d3fee8dabacbffac3eb742eb5780bc70 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Tue, 2 Sep 2025 22:43:03 +0000 Subject: [PATCH 141/294] build: Bump serde_json from 1.0.120 to 1.0.143 Bumps [serde_json](https://github.com/serde-rs/json) from 1.0.120 to 1.0.143. - [Release notes](https://github.com/serde-rs/json/releases) - [Commits](https://github.com/serde-rs/json/compare/v1.0.120...v1.0.143) --- updated-dependencies: - dependency-name: serde_json dependency-version: 1.0.143 dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] --- Cargo.lock | 5 +++-- Cargo.toml | 2 +- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index e4efc129cf..ef5cb9ddcd 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1902,11 +1902,12 @@ dependencies = [ [[package]] name = "serde_json" -version = "1.0.120" +version = "1.0.143" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4e0d21c9a8cae1235ad58a00c11cb40d4b1e5c784f1ef2c537876ed6ffd8b7c5" +checksum = "d401abef1d108fbd9cbaebc3e46611f4b1021f714a0597a71f41ee463f5f4a5a" dependencies = [ "itoa", + "memchr", "ryu", "serde", ] diff --git a/Cargo.toml b/Cargo.toml index 8f49a0da80..767d8ff6c3 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -131,7 +131,7 @@ igvm_defs = { git = "https://github.com/microsoft/igvm", branch = "main" } # serde crates serde = "1.0.208" -serde_json = "1.0.120" +serde_json = "1.0.143" serde_with = { version = "3.14.0", default-features = false } # other crates From 1ca6c159ef4cca6ffa94f24daa75e7971e8dbd16 Mon Sep 17 00:00:00 2001 From: Muminul Islam Date: Tue, 2 Sep 2025 15:43:03 -0700 Subject: [PATCH 142/294] tests: option to override default migratable version This patch gives user an option to override the default migratable version to any later release. This option makes MSHV specific tests suitable for tests since MSHV is stable after some breaking changes. This patch is also necessary for MSHV CI. Signed-off-by: Muminul Islam --- scripts/dev_cli.sh | 1 + scripts/run_integration_tests_live_migration.sh | 13 +++++++++++-- 2 files changed, 12 insertions(+), 2 deletions(-) diff --git a/scripts/dev_cli.sh b/scripts/dev_cli.sh index 257889ae5b..6dea0d7d22 100755 --- a/scripts/dev_cli.sh +++ b/scripts/dev_cli.sh @@ -538,6 +538,7 @@ cmd_tests() { --env TARGET_CC="$target_cc" \ --env AUTH_DOWNLOAD_TOKEN="$AUTH_DOWNLOAD_TOKEN" \ --env LLVM_PROFILE_FILE="$LLVM_PROFILE_FILE" \ + --env MIGRATABLE_VERSION="$MIGRATABLE_VERSION" \ "$CTR_IMAGE" \ ./scripts/run_integration_tests_live_migration.sh "$@" || fix_dir_perms $? || exit $? fi diff --git a/scripts/run_integration_tests_live_migration.sh b/scripts/run_integration_tests_live_migration.sh index 0df9c01c91..fa0b3dcf45 100755 --- a/scripts/run_integration_tests_live_migration.sh +++ b/scripts/run_integration_tests_live_migration.sh @@ -11,6 +11,7 @@ mkdir -p "$WORKLOADS_DIR" process_common_args "$@" +migratable_version=v39.0 # For now these values are default for kvm test_features="" @@ -18,6 +19,15 @@ if [ "$hypervisor" = "mshv" ]; then test_features="--features mshv" fi +# if migratable version is set to override the default +if [ -n "${MIGRATABLE_VERSION}" ]; then + # validate the version if matched with vxx.0 + if ! [[ "${MIGRATABLE_VERSION}" =~ ^v[0-9]{2,}\.[0-9]$ ]]; then + echo "MIGRATABLE_VERSION should be in format vxx.0, e.g. v47.0" + exit 1 + fi + migratable_version=${MIGRATABLE_VERSION} +fi cp scripts/sha1sums-x86_64 "$WORKLOADS_DIR" FOCAL_OS_IMAGE_NAME="focal-server-cloudimg-amd64-custom-20210609-0.qcow2" @@ -45,8 +55,7 @@ fi popd || exit # Download Cloud Hypervisor binary from its last stable release -LAST_RELEASE_VERSION="v39.0" -CH_RELEASE_URL="https://github.com/cloud-hypervisor/cloud-hypervisor/releases/download/$LAST_RELEASE_VERSION/cloud-hypervisor-static" +CH_RELEASE_URL="https://github.com/cloud-hypervisor/cloud-hypervisor/releases/download/${migratable_version}/cloud-hypervisor-static" CH_RELEASE_NAME="cloud-hypervisor-static" pushd "$WORKLOADS_DIR" || exit time wget --quiet $CH_RELEASE_URL -O "$CH_RELEASE_NAME" || exit 1 From 92325fc073a7cac44cd431fcbd58a693fbe64e52 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Wed, 3 Sep 2025 17:58:02 +0000 Subject: [PATCH 143/294] build: Bump zvariant from 5.6.0 to 5.7.0 Bumps [zvariant](https://github.com/dbus2/zbus) from 5.6.0 to 5.7.0. - [Release notes](https://github.com/dbus2/zbus/releases) - [Commits](https://github.com/dbus2/zbus/compare/zvariant-5.6.0...zvariant-5.7.0) --- updated-dependencies: - dependency-name: zvariant dependency-version: 5.7.0 dependency-type: indirect update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] --- Cargo.lock | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index ef5cb9ddcd..06680355e8 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2958,9 +2958,9 @@ dependencies = [ [[package]] name = "zvariant" -version = "5.6.0" +version = "5.7.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d91b3680bb339216abd84714172b5138a4edac677e641ef17e1d8cb1b3ca6e6f" +checksum = "999dd3be73c52b1fccd109a4a81e4fcd20fab1d3599c8121b38d04e1419498db" dependencies = [ "endi", "enumflags2", @@ -2972,9 +2972,9 @@ dependencies = [ [[package]] name = "zvariant_derive" -version = "5.6.0" +version = "5.7.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3a8c68501be459a8dbfffbe5d792acdd23b4959940fc87785fb013b32edbc208" +checksum = "6643fd0b26a46d226bd90d3f07c1b5321fe9bb7f04673cb37ac6d6883885b68e" dependencies = [ "proc-macro-crate", "proc-macro2", From bd2219004353aa1d45440abce3a8adcc617036bb Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Thu, 4 Sep 2025 07:21:02 +0000 Subject: [PATCH 144/294] build: Bump uuid from 1.17.0 to 1.18.1 Bumps [uuid](https://github.com/uuid-rs/uuid) from 1.17.0 to 1.18.1. - [Release notes](https://github.com/uuid-rs/uuid/releases) - [Commits](https://github.com/uuid-rs/uuid/compare/v1.17.0...v1.18.1) --- updated-dependencies: - dependency-name: uuid dependency-version: 1.18.1 dependency-type: direct:production update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] --- Cargo.lock | 4 ++-- Cargo.toml | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 06680355e8..0f9793b81b 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2209,9 +2209,9 @@ checksum = "06abde3611657adf66d383f00b093d7faecc7fa57071cce2578660c9f1010821" [[package]] name = "uuid" -version = "1.17.0" +version = "1.18.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3cf4199d1e5d15ddd86a694e4d0dffa9c323ce759fea589f00fef9d81cc1931d" +checksum = "2f87b8aa10b915a06587d0dec516c282ff295b475d94abf425d62b57710070a2" dependencies = [ "getrandom 0.3.3", "js-sys", diff --git a/Cargo.toml b/Cargo.toml index 767d8ff6c3..74cdbbeb17 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -149,6 +149,6 @@ libc = "0.2.167" log = "0.4.22" signal-hook = "0.3.18" thiserror = "2.0.12" -uuid = { version = "1.17.0" } +uuid = { version = "1.18.1" } wait-timeout = "0.2.1" zerocopy = { version = "0.8.26", default-features = false } From f7e72456b968592b27224ddbbe58b7a9c2aed9c5 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Thu, 4 Sep 2025 10:18:23 +0000 Subject: [PATCH 145/294] build: Bump anstyle-parse from 0.2.6 to 0.2.7 Bumps [anstyle-parse](https://github.com/rust-cli/anstyle) from 0.2.6 to 0.2.7. - [Commits](https://github.com/rust-cli/anstyle/compare/anstyle-parse-v0.2.6...anstyle-parse-v0.2.7) --- updated-dependencies: - dependency-name: anstyle-parse dependency-version: 0.2.7 dependency-type: indirect update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] --- Cargo.lock | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 0f9793b81b..3e6a50bce8 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -57,9 +57,9 @@ checksum = "1bec1de6f59aedf83baf9ff929c98f2ad654b97c9510f4e70cf6f661d49fd5b1" [[package]] name = "anstyle-parse" -version = "0.2.6" +version = "0.2.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3b2d16507662817a6a20a9ea92df6652ee4f94f914589377d69f3b21bc5798a9" +checksum = "4e7644824f0aa2c7b9384579234ef10eb7efb6a0deb83f9630a49594dd9c15c2" dependencies = [ "utf8parse", ] From 676fb93c1a1ad16773b6f43d01e55ae89b836f79 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Thu, 4 Sep 2025 10:29:24 +0000 Subject: [PATCH 146/294] build: Bump crate-ci/typos from 1.35.7 to 1.36.1 Bumps [crate-ci/typos](https://github.com/crate-ci/typos) from 1.35.7 to 1.36.1. - [Release notes](https://github.com/crate-ci/typos/releases) - [Changelog](https://github.com/crate-ci/typos/blob/master/CHANGELOG.md) - [Commits](https://github.com/crate-ci/typos/compare/v1.35.7...v1.36.1) --- updated-dependencies: - dependency-name: crate-ci/typos dependency-version: 1.36.1 dependency-type: direct:production update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] --- .github/workflows/quality.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/quality.yaml b/.github/workflows/quality.yaml index 08922bdaf2..24ee35990c 100644 --- a/.github/workflows/quality.yaml +++ b/.github/workflows/quality.yaml @@ -167,4 +167,4 @@ jobs: steps: - uses: actions/checkout@v5 # Executes "typos ." - - uses: crate-ci/typos@v1.35.7 + - uses: crate-ci/typos@v1.36.1 From 1cc193ea3ca0438d44b2b6a1593770917a9819be Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Thu, 4 Sep 2025 12:24:08 +0000 Subject: [PATCH 147/294] build: Bump anstyle from 1.0.8 to 1.0.11 Bumps [anstyle](https://github.com/rust-cli/anstyle) from 1.0.8 to 1.0.11. - [Commits](https://github.com/rust-cli/anstyle/compare/v1.0.8...v1.0.11) --- updated-dependencies: - dependency-name: anstyle dependency-version: 1.0.11 dependency-type: indirect update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] --- Cargo.lock | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 3e6a50bce8..440c50b5ee 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -51,9 +51,9 @@ dependencies = [ [[package]] name = "anstyle" -version = "1.0.8" +version = "1.0.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1bec1de6f59aedf83baf9ff929c98f2ad654b97c9510f4e70cf6f661d49fd5b1" +checksum = "862ed96ca487e809f1c8e5a8447f6ee2cf102f846893800b20cebdf541fc6bbd" [[package]] name = "anstyle-parse" From f7f79642ab79999c514e8cd36bd9315855ea6d78 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Thu, 4 Sep 2025 14:54:21 +0000 Subject: [PATCH 148/294] build: Bump io-uring from 0.6.4 to 0.7.10 Bumps [io-uring](https://github.com/tokio-rs/io-uring) from 0.6.4 to 0.7.10. - [Commits](https://github.com/tokio-rs/io-uring/commits) --- updated-dependencies: - dependency-name: io-uring dependency-version: 0.7.10 dependency-type: direct:production update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] --- Cargo.lock | 7 ++++--- block/Cargo.toml | 2 +- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 440c50b5ee..cffc146c9a 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -996,11 +996,12 @@ dependencies = [ [[package]] name = "io-uring" -version = "0.6.4" +version = "0.7.10" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "595a0399f411a508feb2ec1e970a4a30c249351e30208960d58298de8660b0e5" +checksum = "046fa2d4d00aea763528b4950358d0ead425372445dc8ff86312b3c69ff7727b" dependencies = [ - "bitflags 1.3.2", + "bitflags 2.9.3", + "cfg-if", "libc", ] diff --git a/block/Cargo.toml b/block/Cargo.toml index aac824a004..9e0505921e 100644 --- a/block/Cargo.toml +++ b/block/Cargo.toml @@ -11,7 +11,7 @@ io_uring = ["dep:io-uring"] [dependencies] byteorder = { workspace = true } crc-any = "2.5.0" -io-uring = { version = "0.6.4", optional = true } +io-uring = { version = "0.7.10", optional = true } libc = { workspace = true } log = { workspace = true } remain = "0.2.15" From 9bf4696b09f815a1de8594c3b042d54c7f76d642 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Thu, 4 Sep 2025 23:01:56 +0000 Subject: [PATCH 149/294] build: Bump libssh2-sys from 0.3.0 to 0.3.1 Bumps [libssh2-sys](https://github.com/alexcrichton/ssh2-rs) from 0.3.0 to 0.3.1. - [Commits](https://github.com/alexcrichton/ssh2-rs/compare/libssh2-sys-0.3.0...libssh2-sys-0.3.1) --- updated-dependencies: - dependency-name: libssh2-sys dependency-version: 0.3.1 dependency-type: indirect update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] --- Cargo.lock | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index cffc146c9a..f985a18180 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1118,9 +1118,9 @@ dependencies = [ [[package]] name = "libssh2-sys" -version = "0.3.0" +version = "0.3.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2dc8a030b787e2119a731f1951d6a773e2280c660f8ec4b0f5e1505a386e71ee" +checksum = "220e4f05ad4a218192533b300327f5150e809b54c4ec83b5a1d91833601811b9" dependencies = [ "cc", "libc", From 7281459bf946260b9401d405d2e752e0aa1a1c01 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Thu, 4 Sep 2025 23:17:01 +0000 Subject: [PATCH 150/294] build: Bump actions/setup-python from 5 to 6 Bumps [actions/setup-python](https://github.com/actions/setup-python) from 5 to 6. - [Release notes](https://github.com/actions/setup-python/releases) - [Commits](https://github.com/actions/setup-python/compare/v5...v6) --- updated-dependencies: - dependency-name: actions/setup-python dependency-version: '6' dependency-type: direct:production update-type: version-update:semver-major ... Signed-off-by: dependabot[bot] --- .github/workflows/dco.yaml | 2 +- .github/workflows/gitlint.yaml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/dco.yaml b/.github/workflows/dco.yaml index 4c83547872..daf21315e0 100644 --- a/.github/workflows/dco.yaml +++ b/.github/workflows/dco.yaml @@ -8,7 +8,7 @@ jobs: steps: - uses: actions/checkout@v5 - name: Set up Python 3.x - uses: actions/setup-python@v5 + uses: actions/setup-python@v6 with: python-version: '3.x' - name: Check DCO diff --git a/.github/workflows/gitlint.yaml b/.github/workflows/gitlint.yaml index c31fee202d..7c3c4f7e45 100644 --- a/.github/workflows/gitlint.yaml +++ b/.github/workflows/gitlint.yaml @@ -13,7 +13,7 @@ jobs: ref: ${{ github.event.pull_request.head.sha }} fetch-depth: 0 - name: Set up Python 3.10 - uses: actions/setup-python@v5 + uses: actions/setup-python@v6 with: python-version: "3.10" - name: Install dependencies From 2d9e2431638dfdbc341625a1ab67f3814dbd77cf Mon Sep 17 00:00:00 2001 From: Shubham Chakrawar Date: Fri, 15 Aug 2025 13:18:14 -0700 Subject: [PATCH 151/294] misc: Remove SGX support from Cloud Hypervisor This commit removes the SGX support from cloud hypervisor. SGX support was deprecated in May as part of #7090. Signed-off-by: Shubham Chakrawar --- .lychee.toml | 2 - arch/src/lib.rs | 5 - arch/src/x86_64/mod.rs | 154 +---------------- docs/intel_sgx.md | 54 ------ docs/memory.md | 23 +-- docs/snapshot_restore.md | 2 +- fuzz/fuzz_targets/http_api.rs | 2 - hypervisor/src/kvm/mod.rs | 26 +-- hypervisor/src/mshv/mod.rs | 7 - hypervisor/src/vm.rs | 10 -- scripts/dev_cli.sh | 26 --- scripts/run_integration_tests_sgx.sh | 51 ------ src/main.rs | 10 -- test_infra/src/lib.rs | 18 -- tests/integration.rs | 44 ----- vmm/src/acpi.rs | 10 -- vmm/src/api/openapi/cloud-hypervisor.yaml | 23 --- vmm/src/config.rs | 85 +--------- vmm/src/cpu.rs | 11 -- vmm/src/lib.rs | 8 +- vmm/src/memory_manager.rs | 197 +--------------------- vmm/src/vm.rs | 33 ---- vmm/src/vm_config.rs | 15 -- 23 files changed, 11 insertions(+), 805 deletions(-) delete mode 100644 docs/intel_sgx.md delete mode 100755 scripts/run_integration_tests_sgx.sh diff --git a/.lychee.toml b/.lychee.toml index 875a861826..44517a7819 100644 --- a/.lychee.toml +++ b/.lychee.toml @@ -2,8 +2,6 @@ verbose = "info" exclude = [ # Availability of links below should be manually verified. - # Page for intel SGX support, returns 403 while querying. - '^https://www.intel.com/content/www/us/en/developer/tools/software-guard-extensions/linux-overview.html', # Page for intel TDX support, returns 403 while querying. '^https://www.intel.com/content/www/us/en/developer/tools/trust-domain-extensions/overview.html', # Page for TPM, returns 403 while querying. diff --git a/arch/src/lib.rs b/arch/src/lib.rs index aff58ffe31..2413fe2235 100644 --- a/arch/src/lib.rs +++ b/arch/src/lib.rs @@ -18,9 +18,6 @@ use std::{fmt, result}; use serde::{Deserialize, Serialize}; use thiserror::Error; -#[cfg(target_arch = "x86_64")] -use crate::x86_64::SgxEpcSection; - type GuestMemoryMmap = vm_memory::GuestMemoryMmap; type GuestRegionMmap = vm_memory::GuestRegionMmap; @@ -127,8 +124,6 @@ pub struct NumaNode { pub pci_segments: Vec, pub distances: BTreeMap, pub memory_zones: Vec, - #[cfg(target_arch = "x86_64")] - pub sgx_epc_sections: Vec, } pub type NumaNodes = BTreeMap; diff --git a/arch/src/x86_64/mod.rs b/arch/src/x86_64/mod.rs index 27e1375d8a..22d1a1cfea 100644 --- a/arch/src/x86_64/mod.rs +++ b/arch/src/x86_64/mod.rs @@ -12,7 +12,6 @@ pub mod layout; mod mpspec; mod mptable; pub mod regs; -use std::collections::BTreeMap; use std::mem; use hypervisor::arch::x86::{CpuIdEntry, CPUID_FLAG_VALID_INDEX}; @@ -24,7 +23,7 @@ use linux_loader::loader::elf::start_info::{ use thiserror::Error; use vm_memory::{ Address, Bytes, GuestAddress, GuestAddressSpace, GuestMemory, GuestMemoryAtomic, - GuestMemoryRegion, GuestUsize, + GuestMemoryRegion, }; use crate::{GuestMemoryMmap, InitramfsConfig, RegionType}; @@ -79,55 +78,7 @@ pub struct EntryPoint { const E820_RAM: u32 = 1; const E820_RESERVED: u32 = 2; -#[derive(Clone)] -pub struct SgxEpcSection { - start: GuestAddress, - size: GuestUsize, -} - -impl SgxEpcSection { - pub fn new(start: GuestAddress, size: GuestUsize) -> Self { - SgxEpcSection { start, size } - } - pub fn start(&self) -> GuestAddress { - self.start - } - pub fn size(&self) -> GuestUsize { - self.size - } -} - -#[derive(Clone)] -pub struct SgxEpcRegion { - start: GuestAddress, - size: GuestUsize, - epc_sections: BTreeMap, -} - -impl SgxEpcRegion { - pub fn new(start: GuestAddress, size: GuestUsize) -> Self { - SgxEpcRegion { - start, - size, - epc_sections: BTreeMap::new(), - } - } - pub fn start(&self) -> GuestAddress { - self.start - } - pub fn size(&self) -> GuestUsize { - self.size - } - pub fn epc_sections(&self) -> &BTreeMap { - &self.epc_sections - } - pub fn insert(&mut self, id: String, epc_section: SgxEpcSection) { - self.epc_sections.insert(id, epc_section); - } -} - pub struct CpuidConfig { - pub sgx_epc_sections: Option>, pub phys_bits: u8, pub kvm_hyperv: bool, #[cfg(feature = "tdx")] @@ -169,18 +120,6 @@ pub enum Error { #[error("Error setting up SMBIOS table")] SmbiosSetup(#[source] smbios::Error), - /// Could not find any SGX EPC section - #[error("Could not find any SGX EPC section")] - NoSgxEpcSection, - - /// Missing SGX CPU feature - #[error("Missing SGX CPU feature")] - MissingSgxFeature, - - /// Missing SGX_LC CPU feature - #[error("Missing SGX_LC CPU feature")] - MissingSgxLaunchControlFeature, - /// Error getting supported CPUID through the hypervisor (kvm/mshv) API #[error("Error getting supported CPUID through the hypervisor API")] CpuidGetSupported(#[source] HypervisorError), @@ -467,7 +406,7 @@ impl CpuidFeatureEntry { feature_reg: CpuidReg::EDX, compatible_check: CpuidCompatibleCheck::BitwiseSubset, }, - // KVM CPUID bits: https://www.kernel.org/doc/html/latest/virt/kvm/cpuid.html + // KVM CPUID bits: https://www.kernel.org/doc/html/latest/virt/kvm/x86/cpuid.html // Leaf 0x4000_0000, EAX/EBX/ECX/EDX, KVM CPUID SIGNATURE CpuidFeatureEntry { function: 0x4000_0000, @@ -675,10 +614,6 @@ pub fn generate_common_cpuid( CpuidPatch::patch_cpuid(&mut cpuid, cpuid_patches); - if let Some(sgx_epc_sections) = &config.sgx_epc_sections { - update_cpuid_sgx(&mut cpuid, sgx_epc_sections)?; - } - #[cfg(feature = "tdx")] let tdx_capabilities = if config.tdx { let caps = hypervisor @@ -974,7 +909,6 @@ pub fn configure_system( _num_cpus: u32, setup_header: Option, rsdp_addr: Option, - sgx_epc_region: Option, serial_number: Option<&str>, uuid: Option<&str>, oem_strings: Option<&[&str]>, @@ -1008,15 +942,8 @@ pub fn configure_system( initramfs, hdr, rsdp_addr, - sgx_epc_region, - ), - None => configure_pvh( - guest_mem, - cmdline_addr, - initramfs, - rsdp_addr, - sgx_epc_region, ), + None => configure_pvh(guest_mem, cmdline_addr, initramfs, rsdp_addr), } } @@ -1108,7 +1035,6 @@ fn configure_pvh( cmdline_addr: GuestAddress, initramfs: &Option, rsdp_addr: Option, - sgx_epc_region: Option, ) -> super::Result<()> { const XEN_HVM_START_MAGIC_VALUE: u32 = 0x336ec578; @@ -1174,15 +1100,6 @@ fn configure_pvh( E820_RESERVED, ); - if let Some(sgx_epc_region) = sgx_epc_region { - add_memmap_entry( - &mut memmap, - sgx_epc_region.start().raw_value(), - sgx_epc_region.size(), - E820_RESERVED, - ); - } - start_info.memmap_entries = memmap.len() as u32; // Copy the vector with the memmap table to the MEMMAP_START address @@ -1229,7 +1146,6 @@ fn configure_32bit_entry( initramfs: &Option, setup_hdr: setup_header, rsdp_addr: Option, - sgx_epc_region: Option, ) -> super::Result<()> { const KERNEL_LOADER_OTHER: u8 = 0xff; @@ -1285,15 +1201,6 @@ fn configure_32bit_entry( E820_RESERVED, )?; - if let Some(sgx_epc_region) = sgx_epc_region { - add_e820_entry( - &mut params, - sgx_epc_region.start().raw_value(), - sgx_epc_region.size(), - E820_RESERVED, - )?; - } - if let Some(rsdp_addr) = rsdp_addr { params.acpi_rsdp_addr = rsdp_addr.0; } @@ -1527,57 +1434,6 @@ fn update_cpuid_topology( } } } - -// The goal is to update the CPUID sub-leaves to reflect the number of EPC -// sections exposed to the guest. -fn update_cpuid_sgx( - cpuid: &mut Vec, - epc_sections: &[SgxEpcSection], -) -> Result<(), Error> { - // Something's wrong if there's no EPC section. - if epc_sections.is_empty() { - return Err(Error::NoSgxEpcSection); - } - // We can't go further if the hypervisor does not support SGX feature. - if !CpuidPatch::is_feature_enabled(cpuid, 0x7, 0, CpuidReg::EBX, 2) { - return Err(Error::MissingSgxFeature); - } - // We can't go further if the hypervisor does not support SGX_LC feature. - if !CpuidPatch::is_feature_enabled(cpuid, 0x7, 0, CpuidReg::ECX, 30) { - return Err(Error::MissingSgxLaunchControlFeature); - } - - // Get host CPUID for leaf 0x12, subleaf 0x2. This is to retrieve EPC - // properties such as confidentiality and integrity. - // SAFETY: call cpuid with valid leaves - let leaf = unsafe { std::arch::x86_64::__cpuid_count(0x12, 0x2) }; - - for (i, epc_section) in epc_sections.iter().enumerate() { - let subleaf_idx = i + 2; - let start = epc_section.start().raw_value(); - let size = epc_section.size(); - let eax = (start & 0xffff_f000) as u32 | 0x1; - let ebx = (start >> 32) as u32; - let ecx = (size & 0xffff_f000) as u32 | (leaf.ecx & 0xf); - let edx = (size >> 32) as u32; - // CPU Topology leaf 0x12 - CpuidPatch::set_cpuid_reg(cpuid, 0x12, Some(subleaf_idx as u32), CpuidReg::EAX, eax); - CpuidPatch::set_cpuid_reg(cpuid, 0x12, Some(subleaf_idx as u32), CpuidReg::EBX, ebx); - CpuidPatch::set_cpuid_reg(cpuid, 0x12, Some(subleaf_idx as u32), CpuidReg::ECX, ecx); - CpuidPatch::set_cpuid_reg(cpuid, 0x12, Some(subleaf_idx as u32), CpuidReg::EDX, edx); - } - - // Add one NULL entry to terminate the dynamic list - let subleaf_idx = epc_sections.len() + 2; - // CPU Topology leaf 0x12 - CpuidPatch::set_cpuid_reg(cpuid, 0x12, Some(subleaf_idx as u32), CpuidReg::EAX, 0); - CpuidPatch::set_cpuid_reg(cpuid, 0x12, Some(subleaf_idx as u32), CpuidReg::EBX, 0); - CpuidPatch::set_cpuid_reg(cpuid, 0x12, Some(subleaf_idx as u32), CpuidReg::ECX, 0); - CpuidPatch::set_cpuid_reg(cpuid, 0x12, Some(subleaf_idx as u32), CpuidReg::EDX, 0); - - Ok(()) -} - #[cfg(test)] mod tests { use linux_loader::loader::bootparam::boot_e820_entry; @@ -1608,7 +1464,6 @@ mod tests { None, None, None, - None, ); config_err.unwrap_err(); @@ -1633,7 +1488,6 @@ mod tests { None, None, None, - None, ) .unwrap(); @@ -1663,7 +1517,6 @@ mod tests { None, None, None, - None, ) .unwrap(); @@ -1679,7 +1532,6 @@ mod tests { None, None, None, - None, ) .unwrap(); } diff --git a/docs/intel_sgx.md b/docs/intel_sgx.md deleted file mode 100644 index 9f2ca76bdc..0000000000 --- a/docs/intel_sgx.md +++ /dev/null @@ -1,54 +0,0 @@ -# Intel SGX - -Intel® Software Guard Extensions (Intel® SGX) is an Intel technology designed -to increase the security of application code and data. Cloud Hypervisor supports -SGX virtualization through KVM. Because SGX is built on hardware features that -cannot be emulated in software, virtualizing SGX requires support in KVM and in -the host kernel. The required Linux and KVM changes can be found in Linux 5.13+. - -Utilizing SGX in the guest requires a kernel/OS with SGX support, e.g. a kernel -since release 5.11, see -[here](https://www.intel.com/content/www/us/en/developer/tools/software-guard-extensions/linux-overview.html). -Running Linux 5.13+ as the guest kernel allows nested virtualization of SGX. - -For more information about SGX, please refer to the [SGX Homepage](https://www.intel.com/content/www/us/en/developer/tools/software-guard-extensions/linux-overview.html). - -For more information about SGX SDK and how to test SGX, please refer to the -following [instructions](https://github.com/intel/linux-sgx). - -## Cloud Hypervisor support - -Assuming the host exposes `/dev/sgx_vepc`, we can pass SGX enclaves through -the guest. - -In order to use SGX enclaves within a Cloud Hypervisor VM, we must define one -or several Enclave Page Cache (EPC) sections. Here is an example of a VM being -created with 2 EPC sections, the first one being 64MiB with pre-allocated -memory, the second one being 32MiB with no pre-allocated memory. - -```bash -./cloud-hypervisor \ - --cpus boot=1 \ - --memory size=1G \ - --disk path=focal-server-cloudimg-amd64.raw \ - --kernel vmlinux \ - --cmdline "console=ttyS0 console=hvc0 root=/dev/vda1 rw" \ - --sgx-epc id=epc0,size=64M,prefault=on id=epc1,size=32M,prefault=off -``` - -Once booted, and assuming your guest kernel contains the patches from the -[KVM SGX Tree](https://github.com/intel/kvm-sgx), you can validate SGX devices -have been correctly created under `/dev/sgx`: - -```bash -ls /dev/sgx* -/dev/sgx_enclave /dev/sgx_provision /dev/sgx_vepc -``` - -From this point, it is possible to run any SGX application from the guest, as -it will access `/dev/sgx_enclave` device to create dedicated SGX enclaves. - -Note: There is only one contiguous SGX EPC region, which contains all SGX EPC -sections. This region is exposed through ACPI and marked as reserved through -the e820 table. It is treated as yet another device, which means it should -appear at the end of the guest address space. diff --git a/docs/memory.md b/docs/memory.md index 46569449c8..a429ff1b78 100644 --- a/docs/memory.md +++ b/docs/memory.md @@ -437,12 +437,11 @@ struct NumaConfig { cpus: Option>, distances: Option>, memory_zones: Option>, - sgx_epc_sections: Option>, } ``` ``` ---numa Settings related to a given NUMA node "guest_numa_id=,cpus=,distances=,memory_zones=,sgx_epc_sections=" +--numa Settings related to a given NUMA node "guest_numa_id=,cpus=,distances=,memory_zones= ``` ### `guest_numa_id` @@ -550,26 +549,6 @@ _Example_ --numa guest_numa_id=0,memory_zones=[mem0,mem2] guest_numa_id=1,memory_zones=mem1 ``` -### `sgx_epc_sections` - -List of SGX EPC sections attached to the guest NUMA node identified by the -`guest_numa_id` option. This allows for describing a list of SGX EPC sections -which must be seen by the guest as belonging to the NUMA node `guest_numa_id`. - -Multiple values can be provided to define the list. Each value is a string -referring to an existing SGX EPC section identifier. Values are separated from -each other with the `,` separator. - -As soon as one tries to describe a list of values, `[` and `]` must be used to -demarcate the list. - -_Example_ - -``` ---sgx-epc id=epc0,size=32M id=epc1,size=64M id=epc2,size=32M ---numa guest_numa_id=0,sgx_epc_sections=epc1 guest_numa_id=1,sgx_epc_sections=[epc0,epc2] -``` - ### PCI bus Cloud Hypervisor supports guests with one or more PCI segments. The default PCI segment always diff --git a/docs/snapshot_restore.md b/docs/snapshot_restore.md index 67f29ce6dc..df7248805e 100644 --- a/docs/snapshot_restore.md +++ b/docs/snapshot_restore.md @@ -110,4 +110,4 @@ from the restored VM. ## Limitations -VFIO devices and Intel SGX are out of scope. +VFIO devices is out of scope. diff --git a/fuzz/fuzz_targets/http_api.rs b/fuzz/fuzz_targets/http_api.rs index ee8fa52376..e9965ceddf 100644 --- a/fuzz/fuzz_targets/http_api.rs +++ b/fuzz/fuzz_targets/http_api.rs @@ -186,8 +186,6 @@ impl RequestHandler for StubApiRequestHandler { #[cfg(feature = "pvmemcontrol")] pvmemcontrol: None, iommu: false, - #[cfg(target_arch = "x86_64")] - sgx_epc: None, numa: None, watchdog: false, gdb: false, diff --git a/hypervisor/src/kvm/mod.rs b/hypervisor/src/kvm/mod.rs index 4204d4f83d..9aaafd5b07 100644 --- a/hypervisor/src/kvm/mod.rs +++ b/hypervisor/src/kvm/mod.rs @@ -12,11 +12,9 @@ use std::any::Any; use std::collections::HashMap; -#[cfg(target_arch = "x86_64")] -use std::fs::File; #[cfg(any(target_arch = "aarch64", target_arch = "riscv64"))] use std::mem::offset_of; -#[cfg(target_arch = "x86_64")] +#[cfg(feature = "tdx")] use std::os::unix::io::AsRawFd; #[cfg(feature = "tdx")] use std::os::unix::io::RawFd; @@ -108,6 +106,8 @@ use kvm_bindings::{kvm_run__bindgen_ty_1, KVMIO}; pub use kvm_ioctls::{Cap, Kvm, VcpuExit}; use thiserror::Error; use vfio_ioctls::VfioDeviceFd; +#[cfg(target_arch = "x86_64")] +use vmm_sys_util::ioctl_io_nr; #[cfg(feature = "tdx")] use vmm_sys_util::{ioctl::ioctl_with_val, ioctl_iowr_nr}; pub use {kvm_bindings, kvm_ioctls}; @@ -116,13 +116,6 @@ pub use {kvm_bindings, kvm_ioctls}; use crate::arch::aarch64::regs; #[cfg(any(target_arch = "aarch64", target_arch = "riscv64"))] use crate::RegList; - -#[cfg(target_arch = "x86_64")] -const KVM_CAP_SGX_ATTRIBUTE: u32 = 196; - -#[cfg(target_arch = "x86_64")] -use vmm_sys_util::ioctl_io_nr; - #[cfg(target_arch = "x86_64")] ioctl_io_nr!(KVM_NMI, kvm_bindings::KVMIO, 0x9a); @@ -893,19 +886,6 @@ impl vm::Vm for KvmVm { Ok(()) } - #[cfg(target_arch = "x86_64")] - fn enable_sgx_attribute(&self, file: File) -> vm::Result<()> { - let mut cap = kvm_enable_cap { - cap: KVM_CAP_SGX_ATTRIBUTE, - ..Default::default() - }; - cap.args[0] = file.as_raw_fd() as u64; - self.fd - .enable_cap(&cap) - .map_err(|e| vm::HypervisorVmError::EnableSgxAttribute(e.into()))?; - Ok(()) - } - /// Retrieve guest clock. #[cfg(target_arch = "x86_64")] fn get_clock(&self) -> vm::Result { diff --git a/hypervisor/src/mshv/mod.rs b/hypervisor/src/mshv/mod.rs index 8b331cf0a8..93634ada0f 100644 --- a/hypervisor/src/mshv/mod.rs +++ b/hypervisor/src/mshv/mod.rs @@ -41,8 +41,6 @@ pub mod x86_64; // aarch64 dependencies #[cfg(target_arch = "aarch64")] pub mod aarch64; -#[cfg(target_arch = "x86_64")] -use std::fs::File; use std::os::unix::io::AsRawFd; #[cfg(target_arch = "aarch64")] use std::sync::Mutex; @@ -1892,11 +1890,6 @@ impl vm::Vm for MshvVm { Ok(()) } - #[cfg(target_arch = "x86_64")] - fn enable_sgx_attribute(&self, _file: File) -> vm::Result<()> { - Ok(()) - } - fn register_ioevent( &self, fd: &EventFd, diff --git a/hypervisor/src/vm.rs b/hypervisor/src/vm.rs index 113c2001fe..bd9c0e6746 100644 --- a/hypervisor/src/vm.rs +++ b/hypervisor/src/vm.rs @@ -11,8 +11,6 @@ // use std::any::Any; -#[cfg(target_arch = "x86_64")] -use std::fs::File; use std::sync::Arc; #[cfg(any(target_arch = "aarch64", target_arch = "riscv64"))] use std::sync::Mutex; @@ -130,12 +128,6 @@ pub enum HypervisorVmError { /// #[error("Failed to enable x2apic API")] EnableX2ApicApi(#[source] anyhow::Error), - /// - /// Enable SGX attribute error - /// - #[error("Failed to enable SGX attribute")] - EnableSgxAttribute(#[source] anyhow::Error), - /// /// Get clock error /// #[error("Failed to get clock")] @@ -363,8 +355,6 @@ pub trait Vm: Send + Sync + Any { /// Enable split Irq capability #[cfg(target_arch = "x86_64")] fn enable_split_irq(&self) -> Result<()>; - #[cfg(target_arch = "x86_64")] - fn enable_sgx_attribute(&self, file: File) -> Result<()>; /// Retrieve guest clock. #[cfg(target_arch = "x86_64")] fn get_clock(&self) -> Result; diff --git a/scripts/dev_cli.sh b/scripts/dev_cli.sh index 6dea0d7d22..519517dcdb 100755 --- a/scripts/dev_cli.sh +++ b/scripts/dev_cli.sh @@ -193,7 +193,6 @@ cmd_help() { echo " Run the Cloud Hypervisor tests." echo " --unit Run the unit tests." echo " --integration Run the integration tests." - echo " --integration-sgx Run the SGX integration tests." echo " --integration-vfio Run the VFIO integration tests." echo " --integration-windows Run the Windows guest integration tests." echo " --integration-live-migration Run the live-migration integration tests." @@ -327,7 +326,6 @@ cmd_clean() { cmd_tests() { unit=false integration=false - integration_sgx=false integration_vfio=false integration_windows=false integration_live_migration=false @@ -346,7 +344,6 @@ cmd_tests() { } ;; "--unit") { unit=true; } ;; "--integration") { integration=true; } ;; - "--integration-sgx") { integration_sgx=true; } ;; "--integration-vfio") { integration_vfio=true; } ;; "--integration-windows") { integration_windows=true; } ;; "--integration-live-migration") { integration_live_migration=true; } ;; @@ -449,29 +446,6 @@ cmd_tests() { dbus-run-session ./scripts/run_integration_tests_"$(uname -m)".sh "$@" || fix_dir_perms $? || exit $? fi - if [ "$integration_sgx" = true ]; then - say "Running SGX integration tests for $target..." - $DOCKER_RUNTIME run \ - --workdir "$CTR_CLH_ROOT_DIR" \ - --rm \ - --privileged \ - --security-opt seccomp=unconfined \ - --ipc=host \ - --net="$CTR_CLH_NET" \ - --mount type=tmpfs,destination=/tmp \ - --volume /dev:/dev \ - --volume "$CLH_ROOT_DIR:$CTR_CLH_ROOT_DIR" \ - ${exported_volumes:+"$exported_volumes"} \ - --volume "$CLH_INTEGRATION_WORKLOADS:$CTR_CLH_INTEGRATION_WORKLOADS" \ - --env USER="root" \ - --env BUILD_TARGET="$target" \ - --env RUSTFLAGS="$rustflags" \ - --env TARGET_CC="$target_cc" \ - --env AUTH_DOWNLOAD_TOKEN="$AUTH_DOWNLOAD_TOKEN" \ - "$CTR_IMAGE" \ - ./scripts/run_integration_tests_sgx.sh "$@" || fix_dir_perms $? || exit $? - fi - if [ "$integration_vfio" = true ]; then say "Running VFIO integration tests for $target..." $DOCKER_RUNTIME run \ diff --git a/scripts/run_integration_tests_sgx.sh b/scripts/run_integration_tests_sgx.sh deleted file mode 100755 index b6549b6288..0000000000 --- a/scripts/run_integration_tests_sgx.sh +++ /dev/null @@ -1,51 +0,0 @@ -#!/usr/bin/env bash -# shellcheck disable=SC2048,SC2086 -set -x - -# shellcheck source=/dev/null -source "$HOME"/.cargo/env -source "$(dirname "$0")"/test-util.sh - -process_common_args "$@" - -if [[ "$hypervisor" = "mshv" ]]; then - echo "Unsupported SGX test for MSHV" - exit 1 -fi - -WORKLOADS_DIR="$HOME/workloads" -mkdir -p "$WORKLOADS_DIR" - -download_hypervisor_fw - -JAMMY_OS_IMAGE_NAME="jammy-server-cloudimg-amd64-custom-20241017-0.qcow2" -JAMMY_OS_IMAGE_URL="https://ch-images.azureedge.net/$JAMMY_OS_IMAGE_NAME" -JAMMY_OS_IMAGE="$WORKLOADS_DIR/$JAMMY_OS_IMAGE_NAME" -if [ ! -f "$JAMMY_OS_IMAGE" ]; then - pushd "$WORKLOADS_DIR" || exit - time wget --quiet $JAMMY_OS_IMAGE_URL || exit 1 - popd || exit -fi - -JAMMY_OS_RAW_IMAGE_NAME="jammy-server-cloudimg-amd64-custom-20241017-0.raw" -JAMMY_OS_RAW_IMAGE="$WORKLOADS_DIR/$JAMMY_OS_RAW_IMAGE_NAME" -if [ ! -f "$JAMMY_OS_RAW_IMAGE" ]; then - pushd "$WORKLOADS_DIR" || exit - time qemu-img convert -p -f qcow2 -O raw $JAMMY_OS_IMAGE_NAME $JAMMY_OS_RAW_IMAGE_NAME || exit 1 - popd || exit -fi - -CFLAGS="" -if [[ "${BUILD_TARGET}" == "x86_64-unknown-linux-musl" ]]; then - # shellcheck disable=SC2034 - CFLAGS="-I /usr/include/x86_64-linux-musl/ -idirafter /usr/include/" -fi - -cargo build --features mshv --all --release --target "$BUILD_TARGET" - -export RUST_BACKTRACE=1 - -time cargo test "sgx::$test_filter" -- ${test_binary_args[*]} -RES=$? - -exit $RES diff --git a/src/main.rs b/src/main.rs index 6daac338f7..8329100b6f 100644 --- a/src/main.rs +++ b/src/main.rs @@ -31,8 +31,6 @@ use vmm::vm_config; use vmm::vm_config::FwCfgConfig; #[cfg(feature = "ivshmem")] use vmm::vm_config::IvshmemConfig; -#[cfg(target_arch = "x86_64")] -use vmm::vm_config::SgxEpcConfig; use vmm::vm_config::{ BalloonConfig, DeviceConfig, DiskConfig, FsConfig, LandlockConfig, NetConfig, NumaConfig, PciSegmentConfig, PmemConfig, RateLimiterGroupConfig, TpmConfig, UserDeviceConfig, VdpaConfig, @@ -429,12 +427,6 @@ fn get_cli_options_sorted( .help("Control serial port: off|null|pty|tty|file=|socket=") .default_value("null") .group("vm-config"), - #[cfg(target_arch = "x86_64")] - Arg::new("sgx-epc") - .long("sgx-epc") - .help(SgxEpcConfig::SYNTAX) - .num_args(1..) - .group("vm-config"), Arg::new("tpm") .long("tpm") .num_args(1) @@ -1030,8 +1022,6 @@ mod unit_tests { #[cfg(feature = "pvmemcontrol")] pvmemcontrol: None, iommu: false, - #[cfg(target_arch = "x86_64")] - sgx_epc: None, numa: None, watchdog: false, #[cfg(feature = "guest_debug")] diff --git a/test_infra/src/lib.rs b/test_infra/src/lib.rs index 6875aa5b22..812a3a9339 100644 --- a/test_infra/src/lib.rs +++ b/test_infra/src/lib.rs @@ -1061,24 +1061,6 @@ impl Guest { } } - #[cfg(target_arch = "x86_64")] - pub fn check_sgx_support(&self) -> Result<(), Error> { - self.ssh_command( - "cpuid -l 0x7 -s 0 | tr -s [:space:] | grep -q 'SGX: \ - Software Guard Extensions supported = true'", - )?; - self.ssh_command( - "cpuid -l 0x7 -s 0 | tr -s [:space:] | grep -q 'SGX_LC: \ - SGX launch config supported = true'", - )?; - self.ssh_command( - "cpuid -l 0x12 -s 0 | tr -s [:space:] | grep -q 'SGX1 \ - supported = true'", - )?; - - Ok(()) - } - pub fn get_pci_bridge_class(&self) -> Result { Ok(self .ssh_command("cat /sys/bus/pci/devices/0000:00:00.0/class")? diff --git a/tests/integration.rs b/tests/integration.rs index 4e74cf0fc8..94f79dac6d 100644 --- a/tests/integration.rs +++ b/tests/integration.rs @@ -9530,50 +9530,6 @@ mod windows { } } -#[cfg(target_arch = "x86_64")] -mod sgx { - use crate::*; - - #[test] - fn test_sgx() { - let jammy_image = JAMMY_IMAGE_NAME.to_string(); - let jammy = UbuntuDiskConfig::new(jammy_image); - let guest = Guest::new(Box::new(jammy)); - - let mut child = GuestCommand::new(&guest) - .args(["--cpus", "boot=1"]) - .args(["--memory", "size=512M"]) - .args(["--kernel", fw_path(FwType::RustHypervisorFirmware).as_str()]) - .default_disks() - .default_net() - .args(["--sgx-epc", "id=epc0,size=64M"]) - .capture_output() - .spawn() - .unwrap(); - - let r = std::panic::catch_unwind(|| { - guest.wait_vm_boot(None).unwrap(); - - // Check if SGX is correctly detected in the guest. - guest.check_sgx_support().unwrap(); - - // Validate the SGX EPC section is 64MiB. - assert_eq!( - guest - .ssh_command("cpuid -l 0x12 -s 2 | grep 'section size' | cut -d '=' -f 2") - .unwrap() - .trim(), - "0x0000000004000000" - ); - }); - - let _ = child.kill(); - let output = child.wait_with_output().unwrap(); - - handle_child_output(r, &output); - } -} - #[cfg(target_arch = "x86_64")] mod vfio { use crate::*; diff --git a/vmm/src/acpi.rs b/vmm/src/acpi.rs index a2299acd84..215deac912 100644 --- a/vmm/src/acpi.rs +++ b/vmm/src/acpi.rs @@ -314,16 +314,6 @@ fn create_srat_table( )) } - #[cfg(target_arch = "x86_64")] - for section in &node.sgx_epc_sections { - srat.append(MemoryAffinity::from_range( - section.start().raw_value(), - section.size(), - proximity_domain, - MemAffinityFlags::ENABLE, - )) - } - for cpu in &node.cpus { #[cfg(target_arch = "x86_64")] let x2apic_id = arch::x86_64::get_x2apic_id(*cpu, topology); diff --git a/vmm/src/api/openapi/cloud-hypervisor.yaml b/vmm/src/api/openapi/cloud-hypervisor.yaml index 80a4fa2572..e4a76f6b74 100644 --- a/vmm/src/api/openapi/cloud-hypervisor.yaml +++ b/vmm/src/api/openapi/cloud-hypervisor.yaml @@ -607,10 +607,6 @@ components: $ref: "#/components/schemas/VdpaConfig" vsock: $ref: "#/components/schemas/VsockConfig" - sgx_epc: - type: array - items: - $ref: "#/components/schemas/SgxEpcConfig" numa: type: array items: @@ -1143,21 +1139,6 @@ components: id: type: string - SgxEpcConfig: - required: - - id - - size - type: object - properties: - id: - type: string - size: - type: integer - format: int64 - prefault: - type: boolean - default: false - NumaDistance: required: - destination @@ -1192,10 +1173,6 @@ components: type: array items: type: string - sgx_epc_sections: - type: array - items: - type: string pci_segments: type: array items: diff --git a/vmm/src/config.rs b/vmm/src/config.rs index 06523761ad..29a2644b74 100644 --- a/vmm/src/config.rs +++ b/vmm/src/config.rs @@ -109,14 +109,6 @@ pub enum Error { /// Failed parsing restore parameters #[error("Error parsing --restore")] ParseRestore(#[source] OptionParserError), - /// Failed parsing SGX EPC parameters - #[cfg(target_arch = "x86_64")] - #[error("Error parsing --sgx-epc")] - ParseSgxEpc(#[source] OptionParserError), - /// Missing 'id' from SGX EPC section - #[cfg(target_arch = "x86_64")] - #[error("Error parsing --sgx-epc: id missing")] - ParseSgxEpcIdMissing, /// Failed parsing NUMA parameters #[error("Error parsing --numa")] ParseNuma(#[source] OptionParserError), @@ -395,8 +387,6 @@ pub struct VmParams<'a> { #[cfg(feature = "pvmemcontrol")] pub pvmemcontrol: bool, pub pvpanic: bool, - #[cfg(target_arch = "x86_64")] - pub sgx_epc: Option>, pub numa: Option>, pub watchdog: bool, #[cfg(feature = "guest_debug")] @@ -462,10 +452,6 @@ impl<'a> VmParams<'a> { #[cfg(feature = "pvmemcontrol")] let pvmemcontrol = args.get_flag("pvmemcontrol"); let pvpanic = args.get_flag("pvpanic"); - #[cfg(target_arch = "x86_64")] - let sgx_epc: Option> = args - .get_many::("sgx-epc") - .map(|x| x.map(|y| y as &str).collect()); let numa: Option> = args .get_many::("numa") .map(|x| x.map(|y| y as &str).collect()); @@ -516,8 +502,6 @@ impl<'a> VmParams<'a> { #[cfg(feature = "pvmemcontrol")] pvmemcontrol, pvpanic, - #[cfg(target_arch = "x86_64")] - sgx_epc, numa, watchdog, #[cfg(feature = "guest_debug")] @@ -2139,36 +2123,10 @@ impl VsockConfig { } } -#[cfg(target_arch = "x86_64")] -impl SgxEpcConfig { - pub const SYNTAX: &'static str = "SGX EPC parameters \ - \"id=,size=,prefault=on|off\""; - - pub fn parse(sgx_epc: &str) -> Result { - let mut parser = OptionParser::new(); - parser.add("id").add("size").add("prefault"); - parser.parse(sgx_epc).map_err(Error::ParseSgxEpc)?; - - let id = parser.get("id").ok_or(Error::ParseSgxEpcIdMissing)?; - let size = parser - .convert::("size") - .map_err(Error::ParseSgxEpc)? - .unwrap_or(ByteSized(0)) - .0; - let prefault = parser - .convert::("prefault") - .map_err(Error::ParseSgxEpc)? - .unwrap_or(Toggle(false)) - .0; - - Ok(SgxEpcConfig { id, size, prefault }) - } -} - impl NumaConfig { pub const SYNTAX: &'static str = "Settings related to a given NUMA node \ \"guest_numa_id=,cpus=,distances=,\ - memory_zones=,sgx_epc_sections=,\ + memory_zones=,\ pci_segments=\""; pub fn parse(numa: &str) -> Result { @@ -2178,7 +2136,6 @@ impl NumaConfig { .add("cpus") .add("distances") .add("memory_zones") - .add("sgx_epc_sections") .add("pci_segments"); parser.parse(numa).map_err(Error::ParseNuma)?; @@ -2206,11 +2163,6 @@ impl NumaConfig { .convert::("memory_zones") .map_err(Error::ParseNuma)? .map(|v| v.0); - #[cfg(target_arch = "x86_64")] - let sgx_epc_sections = parser - .convert::("sgx_epc_sections") - .map_err(Error::ParseNuma)? - .map(|v| v.0); let pci_segments = parser .convert::("pci_segments") .map_err(Error::ParseNuma)? @@ -2220,8 +2172,6 @@ impl NumaConfig { cpus, distances, memory_zones, - #[cfg(target_arch = "x86_64")] - sgx_epc_sections, pci_segments, }) } @@ -2800,14 +2750,6 @@ impl VmConfig { } } - #[cfg(target_arch = "x86_64")] - if let Some(sgx_epcs) = &self.sgx_epc { - for sgx_epc in sgx_epcs.iter() { - let id = sgx_epc.id.clone(); - Self::validate_identifier(&mut id_list, &Some(id))?; - } - } - if let Some(pci_segments) = &self.pci_segments { for pci_segment in pci_segments { pci_segment.validate(self)?; @@ -2957,21 +2899,6 @@ impl VmConfig { let platform = vm_params.platform.map(PlatformConfig::parse).transpose()?; - #[cfg(target_arch = "x86_64")] - let mut sgx_epc: Option> = None; - #[cfg(target_arch = "x86_64")] - { - if let Some(sgx_epc_list) = &vm_params.sgx_epc { - warn!("SGX support is deprecated and will be removed in a future release."); - let mut sgx_epc_config_list = Vec::new(); - for item in sgx_epc_list.iter() { - let sgx_epc_config = SgxEpcConfig::parse(item)?; - sgx_epc_config_list.push(sgx_epc_config); - } - sgx_epc = Some(sgx_epc_config_list); - } - } - let mut numa: Option> = None; if let Some(numa_list) = &vm_params.numa { let mut numa_config_list = Vec::new(); @@ -3058,8 +2985,6 @@ impl VmConfig { pvmemcontrol, pvpanic: vm_params.pvpanic, iommu: false, // updated in VmConfig::validate() - #[cfg(target_arch = "x86_64")] - sgx_epc, numa, watchdog: vm_params.watchdog, #[cfg(feature = "guest_debug")] @@ -3189,8 +3114,6 @@ impl Clone for VmConfig { user_devices: self.user_devices.clone(), vdpa: self.vdpa.clone(), vsock: self.vsock.clone(), - #[cfg(target_arch = "x86_64")] - sgx_epc: self.sgx_epc.clone(), numa: self.numa.clone(), pci_segments: self.pci_segments.clone(), platform: self.platform.clone(), @@ -3976,8 +3899,6 @@ mod tests { pvmemcontrol: None, pvpanic: false, iommu: false, - #[cfg(target_arch = "x86_64")] - sgx_epc: None, numa: None, watchdog: false, #[cfg(feature = "guest_debug")] @@ -4119,8 +4040,6 @@ mod tests { cpus: None, distances: None, memory_zones: None, - #[cfg(target_arch = "x86_64")] - sgx_epc_sections: None, pci_segments: None, } } @@ -4192,8 +4111,6 @@ mod tests { pvmemcontrol: None, pvpanic: false, iommu: false, - #[cfg(target_arch = "x86_64")] - sgx_epc: None, numa: None, watchdog: false, #[cfg(feature = "guest_debug")] diff --git a/vmm/src/cpu.rs b/vmm/src/cpu.rs index 0f4ac18cc8..7f542015ed 100644 --- a/vmm/src/cpu.rs +++ b/vmm/src/cpu.rs @@ -82,8 +82,6 @@ use crate::coredump::{ }; #[cfg(feature = "guest_debug")] use crate::gdb::{get_raw_tid, Debuggable, DebuggableError}; -#[cfg(target_arch = "x86_64")] -use crate::memory_manager::MemoryManager; use crate::seccomp_filters::{get_seccomp_filter, Thread}; #[cfg(target_arch = "x86_64")] use crate::vm::physical_bits; @@ -799,23 +797,14 @@ impl CpuManager { #[cfg(target_arch = "x86_64")] pub fn populate_cpuid( &mut self, - memory_manager: &Arc>, hypervisor: &Arc, #[cfg(feature = "tdx")] tdx: bool, ) -> Result<()> { - let sgx_epc_sections = memory_manager - .lock() - .unwrap() - .sgx_epc_region() - .as_ref() - .map(|sgx_epc_region| sgx_epc_region.epc_sections().values().cloned().collect()); - self.cpuid = { let phys_bits = physical_bits(hypervisor, self.config.max_phys_bits); arch::generate_common_cpuid( hypervisor, &arch::CpuidConfig { - sgx_epc_sections, phys_bits, kvm_hyperv: self.config.kvm_hyperv, #[cfg(feature = "tdx")] diff --git a/vmm/src/lib.rs b/vmm/src/lib.rs index dddfe9bd33..0378b7b18a 100644 --- a/vmm/src/lib.rs +++ b/vmm/src/lib.rs @@ -900,8 +900,6 @@ impl Vmm { false, Some(&vm_migration_config.memory_manager_data), existing_memory_files, - #[cfg(target_arch = "x86_64")] - None, ) .map_err(|e| { MigratableError::MigrateReceive(anyhow!( @@ -1135,7 +1133,6 @@ impl Vmm { arch::generate_common_cpuid( &hypervisor, &arch::CpuidConfig { - sgx_epc_sections: None, phys_bits, kvm_hyperv: vm_config.lock().unwrap().cpus.kvm_hyperv, #[cfg(feature = "tdx")] @@ -1266,7 +1263,7 @@ impl Vmm { }; // We check the `CPUID` compatibility of between the source vm and destination, which is - // mostly about feature compatibility and "topology/sgx" leaves are not relevant. + // mostly about feature compatibility. let dest_cpuid = &{ let vm_config = &src_vm_config.lock().unwrap(); @@ -1274,7 +1271,6 @@ impl Vmm { arch::generate_common_cpuid( &self.hypervisor.clone(), &arch::CpuidConfig { - sgx_epc_sections: None, phys_bits, kvm_hyperv: vm_config.cpus.kvm_hyperv, #[cfg(feature = "tdx")] @@ -2428,8 +2424,6 @@ mod unit_tests { pvmemcontrol: None, pvpanic: false, iommu: false, - #[cfg(target_arch = "x86_64")] - sgx_epc: None, numa: None, watchdog: false, #[cfg(feature = "guest_debug")] diff --git a/vmm/src/memory_manager.rs b/vmm/src/memory_manager.rs index cdc5810445..461eb24b9a 100644 --- a/vmm/src/memory_manager.rs +++ b/vmm/src/memory_manager.rs @@ -19,16 +19,12 @@ use std::{ffi, result, thread}; use acpi_tables::{aml, Aml}; use anyhow::anyhow; -#[cfg(target_arch = "x86_64")] -use arch::x86_64::{SgxEpcRegion, SgxEpcSection}; use arch::RegionType; #[cfg(target_arch = "x86_64")] use devices::ioapic; #[cfg(target_arch = "aarch64")] use hypervisor::HypervisorVmError; use libc::_SC_NPROCESSORS_ONLN; -#[cfg(target_arch = "x86_64")] -use libc::{MAP_NORESERVE, MAP_POPULATE, MAP_SHARED, PROT_READ, PROT_WRITE}; use serde::{Deserialize, Serialize}; use thiserror::Error; use tracer::trace_scoped; @@ -54,8 +50,6 @@ use crate::coredump::{ CoredumpMemoryRegion, CoredumpMemoryRegions, DumpState, GuestDebuggableError, }; use crate::migration::url_to_path; -#[cfg(target_arch = "x86_64")] -use crate::vm_config::SgxEpcConfig; use crate::vm_config::{HotplugMethod, MemoryConfig, MemoryZoneConfig}; use crate::{GuestMemoryMmap, GuestRegionMmap, MEMORY_MANAGER_SNAPSHOT_ID}; @@ -68,9 +62,6 @@ const SNAPSHOT_FILENAME: &str = "memory-ranges"; #[cfg(target_arch = "x86_64")] const X86_64_IRQ_BASE: u32 = 5; -#[cfg(target_arch = "x86_64")] -const SGX_PAGE_SIZE: u64 = 1 << 12; - const HOTPLUG_COUNT: usize = 8; // Memory policy constants @@ -183,8 +174,6 @@ pub struct MemoryManager { hugepage_size: Option, prefault: bool, thp: bool, - #[cfg(target_arch = "x86_64")] - sgx_epc_region: Option, user_provided_zones: bool, snapshot_memory_ranges: MemoryRangeTable, memory_zones: MemoryZones, @@ -269,36 +258,6 @@ pub enum Error { #[error("Cannot create the system allocator")] CreateSystemAllocator, - /// Invalid SGX EPC section size - #[cfg(target_arch = "x86_64")] - #[error("Invalid SGX EPC section size")] - EpcSectionSizeInvalid, - - /// Failed allocating SGX EPC region - #[cfg(target_arch = "x86_64")] - #[error("Failed allocating SGX EPC region")] - SgxEpcRangeAllocation, - - /// Failed opening SGX virtual EPC device - #[cfg(target_arch = "x86_64")] - #[error("Failed opening SGX virtual EPC device")] - SgxVirtEpcOpen(#[source] io::Error), - - /// Failed setting the SGX virtual EPC section size - #[cfg(target_arch = "x86_64")] - #[error("Failed setting the SGX virtual EPC section size")] - SgxVirtEpcFileSetLen(#[source] io::Error), - - /// Failed opening SGX provisioning device - #[cfg(target_arch = "x86_64")] - #[error("Failed opening SGX provisioning device")] - SgxProvisionOpen(#[source] io::Error), - - /// Failed enabling SGX provisioning - #[cfg(target_arch = "x86_64")] - #[error("Failed enabling SGX provisioning")] - SgxEnableProvisioning(#[source] hypervisor::HypervisorVmError), - /// Failed creating a new MmapRegion instance. #[cfg(target_arch = "x86_64")] #[error("Failed creating a new MmapRegion instance")] @@ -1034,7 +993,6 @@ impl MemoryManager { #[cfg(feature = "tdx")] tdx_enabled: bool, restore_data: Option<&MemoryManagerSnapshotData>, existing_memory_files: Option>, - #[cfg(target_arch = "x86_64")] sgx_epc_config: Option>, ) -> Result>, Error> { trace_scoped!("MemoryManager::new"); @@ -1236,8 +1194,7 @@ impl MemoryManager { None }; - // If running on SGX the start of device area and RAM area may diverge but - // at this point they are next to each other. + // The start of device area and RAM area are placed next to each other. let end_of_ram_area = start_of_device_area.unchecked_sub(1); let ram_allocator = AddressAllocator::new(GuestAddress(0), start_of_device_area.0).unwrap(); @@ -1263,8 +1220,6 @@ impl MemoryManager { hugepages: config.hugepages, hugepage_size: config.hugepage_size, prefault: config.prefault, - #[cfg(target_arch = "x86_64")] - sgx_epc_region: None, user_provided_zones, snapshot_memory_ranges: MemoryRangeTable::default(), memory_zones, @@ -1279,11 +1234,6 @@ impl MemoryManager { thp: config.thp, }; - #[cfg(target_arch = "x86_64")] - if let Some(sgx_epc_config) = sgx_epc_config { - memory_manager.setup_sgx(sgx_epc_config)?; - } - Ok(Arc::new(Mutex::new(memory_manager))) } @@ -1311,8 +1261,6 @@ impl MemoryManager { false, Some(&mem_snapshot), None, - #[cfg(target_arch = "x86_64")] - None, )?; mm.lock() @@ -1976,121 +1924,6 @@ impl MemoryManager { self.virtio_mem_resize(id, virtio_mem_size) } - #[cfg(target_arch = "x86_64")] - pub fn setup_sgx(&mut self, sgx_epc_config: Vec) -> Result<(), Error> { - let file = OpenOptions::new() - .read(true) - .open("/dev/sgx_provision") - .map_err(Error::SgxProvisionOpen)?; - self.vm - .enable_sgx_attribute(file) - .map_err(Error::SgxEnableProvisioning)?; - - // Go over each EPC section and verify its size is a 4k multiple. At - // the same time, calculate the total size needed for the contiguous - // EPC region. - let mut epc_region_size = 0; - for epc_section in sgx_epc_config.iter() { - if epc_section.size == 0 { - return Err(Error::EpcSectionSizeInvalid); - } - if epc_section.size & (SGX_PAGE_SIZE - 1) != 0 { - return Err(Error::EpcSectionSizeInvalid); - } - - epc_region_size += epc_section.size; - } - - // Place the SGX EPC region on a 4k boundary between the RAM and the device area - let epc_region_start = - GuestAddress(self.start_of_device_area.0.div_ceil(SGX_PAGE_SIZE) * SGX_PAGE_SIZE); - - self.start_of_device_area = epc_region_start - .checked_add(epc_region_size) - .ok_or(Error::GuestAddressOverFlow)?; - - let mut sgx_epc_region = SgxEpcRegion::new(epc_region_start, epc_region_size as GuestUsize); - info!( - "SGX EPC region: 0x{:x} (0x{:x})", - epc_region_start.0, epc_region_size - ); - - // Each section can be memory mapped into the allocated region. - let mut epc_section_start = epc_region_start.raw_value(); - for epc_section in sgx_epc_config.iter() { - let file = OpenOptions::new() - .read(true) - .write(true) - .open("/dev/sgx_vepc") - .map_err(Error::SgxVirtEpcOpen)?; - - let prot = PROT_READ | PROT_WRITE; - let mut flags = MAP_NORESERVE | MAP_SHARED; - if epc_section.prefault { - flags |= MAP_POPULATE; - } - - // We can't use the vm-memory crate to perform the memory mapping - // here as it would try to ensure the size of the backing file is - // matching the size of the expected mapping. The /dev/sgx_vepc - // device does not work that way, it provides a file descriptor - // which is not matching the mapping size, as it's a just a way to - // let KVM know that an EPC section is being created for the guest. - // SAFETY: FFI call with correct arguments - let host_addr = unsafe { - libc::mmap( - std::ptr::null_mut(), - epc_section.size as usize, - prot, - flags, - file.as_raw_fd(), - 0, - ) - }; - - if host_addr == libc::MAP_FAILED { - error!( - "Could not add SGX EPC section (size 0x{:x})", - epc_section.size - ); - return Err(Error::SgxEpcRangeAllocation); - } - - info!( - "Adding SGX EPC section: 0x{:x} (0x{:x})", - epc_section_start, epc_section.size - ); - - let _mem_slot = self.create_userspace_mapping( - epc_section_start, - epc_section.size, - host_addr as u64, - false, - false, - false, - )?; - - sgx_epc_region.insert( - epc_section.id.clone(), - SgxEpcSection::new( - GuestAddress(epc_section_start), - epc_section.size as GuestUsize, - ), - ); - - epc_section_start += epc_section.size; - } - - self.sgx_epc_region = Some(sgx_epc_region); - - Ok(()) - } - - #[cfg(target_arch = "x86_64")] - pub fn sgx_epc_region(&self) -> &Option { - &self.sgx_epc_region - } - pub fn is_hardlink(f: &File) -> bool { let mut stat = std::mem::MaybeUninit::::uninit(); // SAFETY: FFI call with correct arguments @@ -2642,34 +2475,6 @@ impl Aml for MemoryManager { ) .to_aml_bytes(sink); } - - #[cfg(target_arch = "x86_64")] - { - if let Some(sgx_epc_region) = &self.sgx_epc_region { - let min = sgx_epc_region.start().raw_value(); - let max = min + sgx_epc_region.size() - 1; - // SGX EPC region - aml::Device::new( - "_SB_.EPC_".into(), - vec![ - &aml::Name::new("_HID".into(), &aml::EISAName::new("INT0E0C")), - // QWORD describing the EPC region start and size - &aml::Name::new( - "_CRS".into(), - &aml::ResourceTemplate::new(vec![&aml::AddressSpace::new_memory( - aml::AddressSpaceCacheable::NotCacheable, - true, - min, - max, - None, - )]), - ), - &aml::Method::new("_STA".into(), 0, false, vec![&aml::Return::new(&0xfu8)]), - ], - ) - .to_aml_bytes(sink); - } - } } } diff --git a/vmm/src/vm.rs b/vmm/src/vm.rs index af1ddaaa46..3092c32367 100644 --- a/vmm/src/vm.rs +++ b/vmm/src/vm.rs @@ -606,7 +606,6 @@ impl Vm { .lock() .unwrap() .populate_cpuid( - &memory_manager, &hypervisor, #[cfg(feature = "tdx")] tdx_enabled, @@ -971,24 +970,6 @@ impl Vm { } } - #[cfg(target_arch = "x86_64")] - if let Some(sgx_epc_sections) = &config.sgx_epc_sections { - if let Some(sgx_epc_region) = mm.sgx_epc_region() { - let mm_sections = sgx_epc_region.epc_sections(); - for sgx_epc_section in sgx_epc_sections.iter() { - if let Some(mm_section) = mm_sections.get(sgx_epc_section) { - node.sgx_epc_sections.push(mm_section.clone()); - } else { - error!("Unknown SGX EPC section '{}'", sgx_epc_section); - return Err(Error::InvalidNumaConfig); - } - } - } else { - error!("Missing SGX EPC region"); - return Err(Error::InvalidNumaConfig); - } - } - numa_nodes.insert(config.guest_numa_id, node); } } @@ -1056,9 +1037,6 @@ impl Vm { ) .map_err(Error::MemoryManager)? } else { - #[cfg(target_arch = "x86_64")] - let sgx_epc_config = vm_config.lock().unwrap().sgx_epc.clone(); - MemoryManager::new( vm.clone(), &vm_config.lock().unwrap().memory.clone(), @@ -1068,8 +1046,6 @@ impl Vm { tdx_enabled, None, None, - #[cfg(target_arch = "x86_64")] - sgx_epc_config, ) .map_err(Error::MemoryManager)? }; @@ -1420,13 +1396,6 @@ impl Vm { let boot_vcpus = self.cpu_manager.lock().unwrap().boot_vcpus(); let rsdp_addr = Some(rsdp_addr); - let sgx_epc_region = self - .memory_manager - .lock() - .unwrap() - .sgx_epc_region() - .as_ref() - .cloned(); let serial_number = self .config @@ -1466,7 +1435,6 @@ impl Vm { boot_vcpus, entry_addr.setup_header, rsdp_addr, - sgx_epc_region, serial_number.as_deref(), uuid.as_deref(), oem_strings.as_deref(), @@ -2917,7 +2885,6 @@ impl Snapshottable for Vm { arch::generate_common_cpuid( &self.hypervisor, &arch::CpuidConfig { - sgx_epc_sections: None, phys_bits, kvm_hyperv: self.config.lock().unwrap().cpus.kvm_hyperv, #[cfg(feature = "tdx")] diff --git a/vmm/src/vm_config.rs b/vmm/src/vm_config.rs index cf1f61e05c..9c149d05f1 100644 --- a/vmm/src/vm_config.rs +++ b/vmm/src/vm_config.rs @@ -671,16 +671,6 @@ impl Default for IvshmemConfig { } } -#[cfg(target_arch = "x86_64")] -#[derive(Clone, Debug, PartialEq, Eq, Deserialize, Serialize)] -pub struct SgxEpcConfig { - pub id: String, - #[serde(default)] - pub size: u64, - #[serde(default)] - pub prefault: bool, -} - #[derive(Clone, Debug, PartialEq, Eq, Deserialize, Serialize)] pub struct NumaDistance { #[serde(default)] @@ -699,9 +689,6 @@ pub struct NumaConfig { pub distances: Option>, #[serde(default)] pub memory_zones: Option>, - #[cfg(target_arch = "x86_64")] - #[serde(default)] - pub sgx_epc_sections: Option>, #[serde(default)] pub pci_segments: Option>, } @@ -941,8 +928,6 @@ pub struct VmConfig { pub pvpanic: bool, #[serde(default)] pub iommu: bool, - #[cfg(target_arch = "x86_64")] - pub sgx_epc: Option>, pub numa: Option>, #[serde(default)] pub watchdog: bool, From 706b56e97d0631816490d3cdd502dce7ccecb45a Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Fri, 5 Sep 2025 23:01:57 +0000 Subject: [PATCH 152/294] build: Bump async-trait from 0.1.88 to 0.1.89 Bumps [async-trait](https://github.com/dtolnay/async-trait) from 0.1.88 to 0.1.89. - [Release notes](https://github.com/dtolnay/async-trait/releases) - [Commits](https://github.com/dtolnay/async-trait/compare/0.1.88...0.1.89) --- updated-dependencies: - dependency-name: async-trait dependency-version: 0.1.89 dependency-type: indirect update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] --- Cargo.lock | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index f985a18180..0b896638f4 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -243,9 +243,9 @@ checksum = "8b75356056920673b02621b35afd0f7dda9306d03c79a30f5c56c44cf256e3de" [[package]] name = "async-trait" -version = "0.1.88" +version = "0.1.89" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e539d3fca749fcee5236ab05e93a52867dd549cc157c8cb7f99595f3cedffdb5" +checksum = "9035ad2d096bed7955a320ee7e2230574d28fd3c3a0f186cbea1ff3c7eed5dbb" dependencies = [ "proc-macro2", "quote", From 3f5c7197057d92560097b7b6c3e39523c189528c Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Fri, 5 Sep 2025 23:18:58 +0000 Subject: [PATCH 153/294] build: Bump crate-ci/typos from 1.36.1 to 1.36.2 Bumps [crate-ci/typos](https://github.com/crate-ci/typos) from 1.36.1 to 1.36.2. - [Release notes](https://github.com/crate-ci/typos/releases) - [Changelog](https://github.com/crate-ci/typos/blob/master/CHANGELOG.md) - [Commits](https://github.com/crate-ci/typos/compare/v1.36.1...v1.36.2) --- updated-dependencies: - dependency-name: crate-ci/typos dependency-version: 1.36.2 dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] --- .github/workflows/quality.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/quality.yaml b/.github/workflows/quality.yaml index 24ee35990c..f767909a27 100644 --- a/.github/workflows/quality.yaml +++ b/.github/workflows/quality.yaml @@ -167,4 +167,4 @@ jobs: steps: - uses: actions/checkout@v5 # Executes "typos ." - - uses: crate-ci/typos@v1.36.1 + - uses: crate-ci/typos@v1.36.2 From 47cff7c37fe9ed1dbb232d749e64280b9c031029 Mon Sep 17 00:00:00 2001 From: Alyssa Ross Date: Sat, 6 Sep 2025 13:01:23 +0200 Subject: [PATCH 154/294] vmm: allow TCGETS2/TCSETS2 where TCGETS/TCSETS are These are now used by Cloud Hypervisor when linked with Glibc 2.42. These values should be correct for all currently supported Cloud Hypervisor platforms, although they are not for all Linux platforms. Closes: https://github.com/cloud-hypervisor/cloud-hypervisor/issues/7276 Signed-off-by: Alyssa Ross --- vmm/src/seccomp_filters.rs | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/vmm/src/seccomp_filters.rs b/vmm/src/seccomp_filters.rs index 1f0a6a47e4..46e38f9c8e 100644 --- a/vmm/src/seccomp_filters.rs +++ b/vmm/src/seccomp_filters.rs @@ -42,7 +42,9 @@ macro_rules! or { // See include/uapi/asm-generic/ioctls.h in the kernel code. const TCGETS: u64 = 0x5401; +const TCGETS2: u64 = 0x802c_542a; const TCSETS: u64 = 0x5402; +const TCSETS2: u64 = 0x402c_542b; const TIOCSCTTY: u64 = 0x540E; const TIOCGPGRP: u64 = 0x540F; const TIOCSPGRP: u64 = 0x5410; @@ -311,7 +313,9 @@ fn create_vmm_ioctl_seccomp_rule_common( and![Cond::new(1, ArgLen::Dword, Eq, SIOCSIFMTU)?], and![Cond::new(1, ArgLen::Dword, Eq, SIOCSIFNETMASK)?], and![Cond::new(1, ArgLen::Dword, Eq, TCSETS)?], + and![Cond::new(1, ArgLen::Dword, Eq, TCSETS2)?], and![Cond::new(1, ArgLen::Dword, Eq, TCGETS)?], + and![Cond::new(1, ArgLen::Dword, Eq, TCGETS2)?], and![Cond::new(1, ArgLen::Dword, Eq, TIOCGPGRP)?], and![Cond::new(1, ArgLen::Dword, Eq, TIOCGPTPEER)?], and![Cond::new(1, ArgLen::Dword, Eq, TIOCGWINSZ)?], @@ -487,7 +491,9 @@ fn create_api_ioctl_seccomp_rule() -> Result, BackendError> { fn create_signal_handler_ioctl_seccomp_rule() -> Result, BackendError> { Ok(or![ and![Cond::new(1, ArgLen::Dword, Eq, TCGETS)?], + and![Cond::new(1, ArgLen::Dword, Eq, TCGETS2)?], and![Cond::new(1, ArgLen::Dword, Eq, TCSETS)?], + and![Cond::new(1, ArgLen::Dword, Eq, TCSETS2)?], and![Cond::new(1, ArgLen::Dword, Eq, TIOCGWINSZ)?], ]) } From 57bc78da4f185efab0bb580f74c824de1855c46e Mon Sep 17 00:00:00 2001 From: Peter Oskolkov Date: Fri, 5 Sep 2025 20:32:36 +0000 Subject: [PATCH 155/294] arch: x86_64: make MAX_SUPPORTED_CPUS_LEGACY public Signed-off-by: Peter Oskolkov --- arch/src/x86_64/mod.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/src/x86_64/mod.rs b/arch/src/x86_64/mod.rs index 22d1a1cfea..83cb0876c0 100644 --- a/arch/src/x86_64/mod.rs +++ b/arch/src/x86_64/mod.rs @@ -34,7 +34,7 @@ pub mod tdx; // While modern architectures support more than 255 CPUs via x2APIC, // legacy devices such as mptable support at most 254 CPUs. -pub(crate) const MAX_SUPPORTED_CPUS_LEGACY: u32 = 254; +pub const MAX_SUPPORTED_CPUS_LEGACY: u32 = 254; // CPUID feature bits #[cfg(feature = "kvm")] From 05d222f0eb55c0a7ad417e96eb7f791a2989c0eb Mon Sep 17 00:00:00 2001 From: Peter Oskolkov Date: Fri, 5 Sep 2025 20:36:25 +0000 Subject: [PATCH 156/294] vmm: raise the (v)CPU limit on kvm/x86_64 Raise the max number of supported (v)CPUs on kvm x86_64 hosts to 8192 (the max allowed value of CONFIG_NR_CPUS in the Linux kernel). Other platfroms keep their existing CPU limits pending further development and testing. The change has been tested on Intel and AMD hosts. Signed-off-by: Barret Rhoden Signed-off-by: Neel Natu Signed-off-by: Ofir Weisse Signed-off-by: Peter Oskolkov --- vmm/src/config.rs | 30 +++++++++++++++++---- vmm/src/cpu.rs | 56 +++++++++++++++++++++------------------ vmm/src/device_manager.rs | 2 +- vmm/src/lib.rs | 9 ++++++- vmm/src/vm.rs | 13 ++++++--- vmm/src/vm_config.rs | 32 +++++++++++++++------- 6 files changed, 97 insertions(+), 45 deletions(-) diff --git a/vmm/src/config.rs b/vmm/src/config.rs index 29a2644b74..b66c71b8c6 100644 --- a/vmm/src/config.rs +++ b/vmm/src/config.rs @@ -27,6 +27,11 @@ use crate::vm_config::*; const MAX_NUM_PCI_SEGMENTS: u16 = 96; const MAX_IOMMU_ADDRESS_WIDTH_BITS: u8 = 64; +#[cfg(all(feature = "kvm", target_arch = "x86_64"))] +const MAX_SUPPORTED_CPUS: u32 = 8192; +#[cfg(not(all(feature = "kvm", target_arch = "x86_64")))] +const MAX_SUPPORTED_CPUS: u32 = 255; + /// Errors associated with VM configuration parameters. #[derive(Debug, Error)] pub enum Error { @@ -182,6 +187,9 @@ pub enum ValidationError { /// Max is less than boot #[error("Max CPUs lower than boot CPUs")] CpusMaxLowerThanBoot, + /// Too many CPUs. + #[error("Too many CPUs: specified {0} but {MAX_SUPPORTED_CPUS} is the limit")] + TooManyCpus(u32 /* specified CPUs */), /// Missing file value for debug-console #[cfg(target_arch = "x86_64")] #[error("Path missing when using file mode for debug console")] @@ -586,11 +594,11 @@ impl CpusConfig { .add("features"); parser.parse(cpus).map_err(Error::ParseCpus)?; - let boot_vcpus: u8 = parser + let boot_vcpus: u32 = parser .convert("boot") .map_err(Error::ParseCpus)? .unwrap_or(DEFAULT_VCPUS); - let max_vcpus: u8 = parser + let max_vcpus: u32 = parser .convert("max") .map_err(Error::ParseCpus)? .unwrap_or(boot_vcpus); @@ -605,7 +613,7 @@ impl CpusConfig { .map_err(Error::ParseCpus)? .unwrap_or(DEFAULT_MAX_PHYS_BITS); let affinity = parser - .convert::>>("affinity") + .convert::>>("affinity") .map_err(Error::ParseCpus)? .map(|v| { v.0.iter() @@ -2147,7 +2155,7 @@ impl NumaConfig { let cpus = parser .convert::("cpus") .map_err(Error::ParseNuma)? - .map(|v| v.0.iter().map(|e| *e as u8).collect()); + .map(|v| v.0.iter().map(|e| *e as u32).collect()); let distances = parser .convert::>("distances") .map_err(Error::ParseNuma)? @@ -2523,6 +2531,15 @@ impl VmConfig { return Err(ValidationError::CpusMaxLowerThanBoot); } + if self.cpus.max_vcpus > MAX_SUPPORTED_CPUS { + // Note: historically, Cloud Hypervisor did not support more than 255(254 on x64) + // vCPUs: self.cpus.max_vcpus was of type u8, so 255 was the maximum; + // on x86_64, the legacy mptable/apic was limited to 254 CPUs. + // + // Now the limit is lifted on x86_64 targets. Other targests/archs: TBD. + return Err(ValidationError::TooManyCpus(self.cpus.max_vcpus)); + } + if let Some(rate_limit_groups) = &self.rate_limit_groups { for rate_limit_group in rate_limit_groups { rate_limit_group.validate(self)?; @@ -2614,7 +2631,10 @@ impl VmConfig { return Err(ValidationError::CpuTopologyDiesPerPackage); } - let total = t.threads_per_core * t.cores_per_die * t.dies_per_package * t.packages; + let total: u32 = (t.threads_per_core as u32) + * (t.cores_per_die as u32) + * (t.dies_per_package as u32) + * (t.packages as u32); if total != self.cpus.max_vcpus { return Err(ValidationError::CpuTopologyCount); } diff --git a/vmm/src/cpu.rs b/vmm/src/cpu.rs index 7f542015ed..00d2468f39 100644 --- a/vmm/src/cpu.rs +++ b/vmm/src/cpu.rs @@ -196,8 +196,8 @@ pub enum Error { #[error("Error setting up AMX")] AmxEnable(#[source] anyhow::Error), - #[error("Maximum number of vCPUs exceeds host limit")] - MaximumVcpusExceeded, + #[error("Maximum number of vCPUs {0} exceeds host limit {1}")] + MaximumVcpusExceeded(u32, u32), #[cfg(feature = "sev_snp")] #[error("Failed to set sev control register")] @@ -698,12 +698,16 @@ impl CpuManager { numa_nodes: &NumaNodes, #[cfg(feature = "sev_snp")] sev_snp_enabled: bool, ) -> Result>> { - if u32::from(config.max_vcpus) > hypervisor.get_max_vcpus() { - return Err(Error::MaximumVcpusExceeded); + if config.max_vcpus > hypervisor.get_max_vcpus() { + return Err(Error::MaximumVcpusExceeded( + config.max_vcpus, + hypervisor.get_max_vcpus(), + )); } - let mut vcpu_states = Vec::with_capacity(usize::from(config.max_vcpus)); - vcpu_states.resize_with(usize::from(config.max_vcpus), VcpuState::default); + let max_vcpus = usize::try_from(config.max_vcpus).unwrap(); + let mut vcpu_states = Vec::with_capacity(max_vcpus); + vcpu_states.resize_with(max_vcpus, VcpuState::default); let hypervisor_type = hypervisor.hypervisor_type(); #[cfg(target_arch = "x86_64")] let cpu_vendor = hypervisor.get_cpu_vendor(); @@ -755,7 +759,7 @@ impl CpuManager { let affinity = if let Some(cpu_affinity) = config.affinity.as_ref() { cpu_affinity .iter() - .map(|a| (a.vcpu as u32, a.host_cpus.clone())) + .map(|a| (a.vcpu, a.host_cpus.clone())) .collect() } else { BTreeMap::new() @@ -781,7 +785,7 @@ impl CpuManager { #[cfg(feature = "guest_debug")] vm_debug_evt, selected_cpu: 0, - vcpus: Vec::with_capacity(usize::from(config.max_vcpus)), + vcpus: Vec::with_capacity(max_vcpus), seccomp_action, vm_ops, acpi_address: None, @@ -895,10 +899,10 @@ impl CpuManager { }, |t| { ( - t.threads_per_core.into(), - t.cores_per_die.into(), - t.dies_per_package.into(), - t.packages.into(), + t.threads_per_core, + t.cores_per_die, + t.dies_per_package, + t.packages, ) }, ); @@ -934,7 +938,7 @@ impl CpuManager { self.present_vcpus() ); - if desired_vcpus > self.config.max_vcpus as u32 { + if desired_vcpus > self.config.max_vcpus { return Err(Error::DesiredVCpuCountExceedsMax); } @@ -1245,7 +1249,7 @@ impl CpuManager { inserting: bool, paused: Option, ) -> Result<()> { - if desired_vcpus > self.config.max_vcpus as u32 { + if desired_vcpus > self.config.max_vcpus { return Err(Error::DesiredVCpuCountExceedsMax); } @@ -1418,11 +1422,11 @@ impl CpuManager { } pub fn boot_vcpus(&self) -> u32 { - self.config.boot_vcpus as u32 + self.config.boot_vcpus } pub fn max_vcpus(&self) -> u32 { - self.config.max_vcpus as u32 + self.config.max_vcpus } #[cfg(target_arch = "x86_64")] @@ -1456,10 +1460,10 @@ impl CpuManager { pub fn get_vcpu_topology(&self) -> Option<(u16, u16, u16, u16)> { self.config.topology.clone().map(|t| { ( - t.threads_per_core.into(), - t.cores_per_die.into(), - t.dies_per_package.into(), - t.packages.into(), + t.threads_per_core, + t.cores_per_die, + t.dies_per_package, + t.packages, ) }) } @@ -1475,7 +1479,7 @@ impl CpuManager { { madt.write(36, arch::layout::APIC_START.0); - for cpu in 0..self.config.max_vcpus as u32 { + for cpu in 0..self.config.max_vcpus { let x2apic_id = get_x2apic_id(cpu, self.get_vcpu_topology()); let lapic = LocalX2Apic { @@ -1483,7 +1487,7 @@ impl CpuManager { length: 16, processor_id: cpu, apic_id: x2apic_id, - flags: if cpu < self.config.boot_vcpus as u32 { + flags: if cpu < self.config.boot_vcpus { 1 << MADT_CPU_ENABLE_FLAG } else { 0 @@ -1535,8 +1539,8 @@ impl CpuManager { r#type: acpi::ACPI_APIC_GENERIC_CPU_INTERFACE, length: 80, reserved0: 0, - cpu_interface_number: cpu as u32, - uid: cpu as u32, + cpu_interface_number: cpu, + uid: cpu, flags: 1, parking_version: 0, performance_interrupt: 0, @@ -2274,7 +2278,7 @@ impl Aml for CpuManager { let uid = aml::Name::new("_CID".into(), &aml::EISAName::new("PNP0A05")); // Bundle methods together under a common object let methods = CpuMethods { - max_vcpus: self.config.max_vcpus as u32, + max_vcpus: self.config.max_vcpus, dynamic: self.dynamic, }; let mut cpu_data_inner: Vec<&dyn Aml> = vec![&hid, &uid, &methods]; @@ -2282,7 +2286,7 @@ impl Aml for CpuManager { #[cfg(target_arch = "x86_64")] let topology = self.get_vcpu_topology(); let mut cpu_devices = Vec::new(); - for cpu_id in 0..(self.config.max_vcpus as u32) { + for cpu_id in 0..self.config.max_vcpus { let proximity_domain = *self.proximity_domain_per_cpu.get(&cpu_id).unwrap_or(&0); let cpu_device = Cpu { cpu_id, diff --git a/vmm/src/device_manager.rs b/vmm/src/device_manager.rs index c097a25974..0ad2ab7ca4 100644 --- a/vmm/src/device_manager.rs +++ b/vmm/src/device_manager.rs @@ -1694,7 +1694,7 @@ impl DeviceManager { ) -> DeviceManagerResult>> { let interrupt_controller: Arc> = Arc::new(Mutex::new( gic::Gic::new( - self.config.lock().unwrap().cpus.boot_vcpus as u32, + self.config.lock().unwrap().cpus.boot_vcpus, Arc::clone(&self.msi_interrupt_manager), self.address_manager.vm.clone(), ) diff --git a/vmm/src/lib.rs b/vmm/src/lib.rs index 0378b7b18a..cb430728b8 100644 --- a/vmm/src/lib.rs +++ b/vmm/src/lib.rs @@ -27,6 +27,8 @@ use anyhow::anyhow; #[cfg(feature = "dbus_api")] use api::dbus::{DBusApiOptions, DBusApiShutdownChannels}; use api::http::HttpApiHandle; +#[cfg(all(feature = "kvm", target_arch = "x86_64"))] +use arch::x86_64::MAX_SUPPORTED_CPUS_LEGACY; use console_devices::{pre_create_console_devices, ConsoleInfo}; use landlock::LandlockError; use libc::{tcsetattr, termios, EFD_NONBLOCK, SIGINT, SIGTERM, TCSANOW}; @@ -888,6 +890,11 @@ impl Vmm { )) })?; + #[cfg(all(feature = "kvm", target_arch = "x86_64"))] + if config.lock().unwrap().max_apic_id() > MAX_SUPPORTED_CPUS_LEGACY { + vm.enable_x2apic_api().unwrap(); + } + let phys_bits = vm::physical_bits(&self.hypervisor, config.lock().unwrap().cpus.max_phys_bits); @@ -1822,7 +1829,7 @@ impl RequestHandler for Vmm { } else { let mut config = self.vm_config.as_ref().unwrap().lock().unwrap(); if let Some(desired_vcpus) = desired_vcpus { - config.cpus.boot_vcpus = desired_vcpus.try_into().unwrap(); + config.cpus.boot_vcpus = desired_vcpus; } if let Some(desired_ram) = desired_ram { config.memory.size = desired_ram; diff --git a/vmm/src/vm.rs b/vmm/src/vm.rs index 3092c32367..d5e1e808d6 100644 --- a/vmm/src/vm.rs +++ b/vmm/src/vm.rs @@ -29,6 +29,8 @@ use anyhow::anyhow; use arch::layout::{KVM_IDENTITY_MAP_START, KVM_TSS_START}; #[cfg(feature = "tdx")] use arch::x86_64::tdx::TdvfSection; +#[cfg(all(feature = "kvm", target_arch = "x86_64"))] +use arch::x86_64::MAX_SUPPORTED_CPUS_LEGACY; #[cfg(any(target_arch = "aarch64", target_arch = "riscv64"))] use arch::PciSpaceInfo; use arch::{get_host_cpu_phys_bits, EntryPoint, NumaNode, NumaNodes}; @@ -944,7 +946,7 @@ impl Vm { } if let Some(cpus) = &config.cpus { - node.cpus.extend(cpus.iter().map(|cpu| *cpu as u32)); + node.cpus.extend(cpus); } if let Some(pci_segments) = &config.pci_segments { @@ -1022,6 +1024,11 @@ impl Vm { vm_config.lock().unwrap().memory.total_size(), )?; + #[cfg(all(feature = "kvm", target_arch = "x86_64"))] + if vm_config.lock().unwrap().max_apic_id() > MAX_SUPPORTED_CPUS_LEGACY { + vm.enable_x2apic_api().unwrap(); + } + let phys_bits = physical_bits(&hypervisor, vm_config.lock().unwrap().cpus.max_phys_bits); let memory_manager = if let Some(snapshot) = @@ -1655,7 +1662,7 @@ impl Vm { .notify_hotplug(AcpiNotificationFlags::CPU_DEVICES_CHANGED) .map_err(Error::DeviceManager)?; } - self.config.lock().unwrap().cpus.boot_vcpus = desired_vcpus.try_into().unwrap(); + self.config.lock().unwrap().cpus.boot_vcpus = desired_vcpus; } if let Some(desired_memory) = desired_memory { @@ -2709,7 +2716,7 @@ impl Vm { &mut self, destination_url: &str, ) -> std::result::Result { - let nr_cpus = self.config.lock().unwrap().cpus.boot_vcpus as u32; + let nr_cpus = self.config.lock().unwrap().cpus.boot_vcpus; let elf_note_size = self.get_note_size(NoteDescType::ElfAndVmm, nr_cpus) as isize; let mut elf_phdr_num = 1; let elf_sh_info = 0; diff --git a/vmm/src/vm_config.rs b/vmm/src/vm_config.rs index 9c149d05f1..d07da3c44f 100644 --- a/vmm/src/vm_config.rs +++ b/vmm/src/vm_config.rs @@ -26,7 +26,7 @@ pub(crate) trait ApplyLandlock { #[derive(Clone, Debug, PartialEq, Eq, Deserialize, Serialize)] pub struct CpuAffinity { - pub vcpu: u8, + pub vcpu: u32, pub host_cpus: Vec, } @@ -39,10 +39,10 @@ pub struct CpuFeatures { #[derive(Clone, Debug, PartialEq, Eq, Deserialize, Serialize)] pub struct CpuTopology { - pub threads_per_core: u8, - pub cores_per_die: u8, - pub dies_per_package: u8, - pub packages: u8, + pub threads_per_core: u16, + pub cores_per_die: u16, + pub dies_per_package: u16, + pub packages: u16, } // When booting with PVH boot the maximum physical addressable size @@ -56,8 +56,8 @@ pub fn default_cpuconfig_max_phys_bits() -> u8 { #[derive(Clone, Debug, PartialEq, Eq, Deserialize, Serialize)] pub struct CpusConfig { - pub boot_vcpus: u8, - pub max_vcpus: u8, + pub boot_vcpus: u32, + pub max_vcpus: u32, #[serde(default)] pub topology: Option, #[serde(default)] @@ -70,7 +70,7 @@ pub struct CpusConfig { pub features: CpuFeatures, } -pub const DEFAULT_VCPUS: u8 = 1; +pub const DEFAULT_VCPUS: u32 = 1; impl Default for CpusConfig { fn default() -> Self { @@ -684,7 +684,7 @@ pub struct NumaConfig { #[serde(default)] pub guest_numa_id: u32, #[serde(default)] - pub cpus: Option>, + pub cpus: Option>, #[serde(default)] pub distances: Option>, #[serde(default)] @@ -1035,4 +1035,18 @@ impl VmConfig { Ok(()) } + + #[cfg(all(feature = "kvm", target_arch = "x86_64"))] + pub(crate) fn max_apic_id(&self) -> u32 { + if let Some(topology) = &self.cpus.topology { + arch::x86_64::get_max_x2apic_id(( + topology.threads_per_core, + topology.cores_per_die, + topology.dies_per_package, + topology.packages, + )) + } else { + self.cpus.max_vcpus + } + } } From 3259234e58e509e5c632527086e316973205e85c Mon Sep 17 00:00:00 2001 From: Alyssa Ross Date: Mon, 8 Sep 2025 16:07:47 +0200 Subject: [PATCH 157/294] vmm: drop unnecessary copies path_beneath_rules() just needs the paths given to it to be AsRef, so there's no need to create new PathBufs for it. Signed-off-by: Alyssa Ross --- vmm/src/landlock.rs | 8 ++++---- vmm/src/vm_config.rs | 45 ++++++++++++++++++++++---------------------- 2 files changed, 27 insertions(+), 26 deletions(-) diff --git a/vmm/src/landlock.rs b/vmm/src/landlock.rs index 3defeaefbd..5f139e5c1c 100644 --- a/vmm/src/landlock.rs +++ b/vmm/src/landlock.rs @@ -4,7 +4,7 @@ use std::convert::TryFrom; use std::io::Error as IoError; -use std::path::PathBuf; +use std::path::Path; #[cfg(test)] use landlock::make_bitflags; @@ -87,13 +87,13 @@ impl Landlock { pub(crate) fn add_rule( &mut self, - path: PathBuf, + path: &Path, access: BitFlags, ) -> Result<(), LandlockError> { // path_beneath_rules in landlock crate handles file and directory access rules. // Incoming path/s are passed to path_beneath_rules, so that we don't // have to worry about the type of the path. - let paths = vec![path.clone()]; + let paths = vec![&path]; let path_beneath_rules = path_beneath_rules(paths, access); self.ruleset .as_mut() @@ -104,7 +104,7 @@ impl Landlock { pub(crate) fn add_rule_with_access( &mut self, - path: PathBuf, + path: &Path, access: &str, ) -> Result<(), LandlockError> { self.add_rule(path, LandlockAccess::try_from(access)?.access)?; diff --git a/vmm/src/vm_config.rs b/vmm/src/vm_config.rs index d07da3c44f..38c0a0ba40 100644 --- a/vmm/src/vm_config.rs +++ b/vmm/src/vm_config.rs @@ -3,7 +3,7 @@ // SPDX-License-Identifier: Apache-2.0 // use std::net::{IpAddr, Ipv4Addr}; -use std::path::PathBuf; +use std::path::{Path, PathBuf}; #[cfg(feature = "fw_cfg")] use std::str::FromStr; use std::{fs, result}; @@ -159,7 +159,7 @@ pub struct MemoryZoneConfig { impl ApplyLandlock for MemoryZoneConfig { fn apply_landlock(&self, landlock: &mut Landlock) -> LandlockResult<()> { if let Some(file) = &self.file { - landlock.add_rule_with_access(file.to_path_buf(), "rw")?; + landlock.add_rule_with_access(file, "rw")?; } Ok(()) } @@ -281,7 +281,7 @@ pub struct DiskConfig { impl ApplyLandlock for DiskConfig { fn apply_landlock(&self, landlock: &mut Landlock) -> LandlockResult<()> { if let Some(path) = &self.path { - landlock.add_rule_with_access(path.to_path_buf(), "rw")?; + landlock.add_rule_with_access(path, "rw")?; } Ok(()) } @@ -425,7 +425,7 @@ impl Default for RngConfig { impl ApplyLandlock for RngConfig { fn apply_landlock(&self, landlock: &mut Landlock) -> LandlockResult<()> { // Rng Path only need read access - landlock.add_rule_with_access(self.src.to_path_buf(), "r")?; + landlock.add_rule_with_access(&self.src, "r")?; Ok(()) } } @@ -469,7 +469,7 @@ pub fn default_fsconfig_queue_size() -> u16 { impl ApplyLandlock for FsConfig { fn apply_landlock(&self, landlock: &mut Landlock) -> LandlockResult<()> { - landlock.add_rule_with_access(self.socket.to_path_buf(), "rw")?; + landlock.add_rule_with_access(&self.socket, "rw")?; Ok(()) } } @@ -492,7 +492,7 @@ pub struct PmemConfig { impl ApplyLandlock for PmemConfig { fn apply_landlock(&self, landlock: &mut Landlock) -> LandlockResult<()> { let access = if self.discard_writes { "r" } else { "rw" }; - landlock.add_rule_with_access(self.file.to_path_buf(), access)?; + landlock.add_rule_with_access(&self.file, access)?; Ok(()) } } @@ -524,10 +524,10 @@ pub fn default_consoleconfig_file() -> Option { impl ApplyLandlock for ConsoleConfig { fn apply_landlock(&self, landlock: &mut Landlock) -> LandlockResult<()> { if let Some(file) = &self.file { - landlock.add_rule_with_access(file.to_path_buf(), "rw")?; + landlock.add_rule_with_access(file, "rw")?; } if let Some(socket) = &self.socket { - landlock.add_rule_with_access(socket.to_path_buf(), "rw")?; + landlock.add_rule_with_access(socket, "rw")?; } Ok(()) } @@ -557,7 +557,7 @@ impl Default for DebugConsoleConfig { impl ApplyLandlock for DebugConsoleConfig { fn apply_landlock(&self, landlock: &mut Landlock) -> LandlockResult<()> { if let Some(file) = &self.file { - landlock.add_rule_with_access(file.to_path_buf(), "rw")?; + landlock.add_rule_with_access(file, "rw")?; } Ok(()) } @@ -585,8 +585,9 @@ impl ApplyLandlock for DeviceConfig { .to_str() .ok_or(LandlockError::InvalidPath)?; - let vfio_group_path = "/dev/vfio/".to_owned() + iommu_group_str; - landlock.add_rule_with_access(vfio_group_path.into(), "rw")?; + let mut vfio_group_path = PathBuf::from("/dev/vfio"); + vfio_group_path.push(iommu_group_str); + landlock.add_rule_with_access(&vfio_group_path, "rw")?; Ok(()) } @@ -603,7 +604,7 @@ pub struct UserDeviceConfig { impl ApplyLandlock for UserDeviceConfig { fn apply_landlock(&self, landlock: &mut Landlock) -> LandlockResult<()> { - landlock.add_rule_with_access(self.socket.to_path_buf(), "rw")?; + landlock.add_rule_with_access(&self.socket, "rw")?; Ok(()) } } @@ -627,7 +628,7 @@ pub fn default_vdpaconfig_num_queues() -> usize { impl ApplyLandlock for VdpaConfig { fn apply_landlock(&self, landlock: &mut Landlock) -> LandlockResult<()> { - landlock.add_rule_with_access(self.path.to_path_buf(), "rw")?; + landlock.add_rule_with_access(&self.path, "rw")?; Ok(()) } } @@ -646,7 +647,7 @@ pub struct VsockConfig { impl ApplyLandlock for VsockConfig { fn apply_landlock(&self, landlock: &mut Landlock) -> LandlockResult<()> { - landlock.add_rule_with_access(self.socket.to_path_buf(), "rw")?; + landlock.add_rule_with_access(&self.socket, "rw")?; Ok(()) } } @@ -832,20 +833,20 @@ impl ApplyLandlock for PayloadConfig { fn apply_landlock(&self, landlock: &mut Landlock) -> LandlockResult<()> { // Payload only needs read access if let Some(firmware) = &self.firmware { - landlock.add_rule_with_access(firmware.to_path_buf(), "r")?; + landlock.add_rule_with_access(firmware, "r")?; } if let Some(kernel) = &self.kernel { - landlock.add_rule_with_access(kernel.to_path_buf(), "r")?; + landlock.add_rule_with_access(kernel, "r")?; } if let Some(initramfs) = &self.initramfs { - landlock.add_rule_with_access(initramfs.to_path_buf(), "r")?; + landlock.add_rule_with_access(initramfs, "r")?; } #[cfg(feature = "igvm")] if let Some(igvm) = &self.igvm { - landlock.add_rule_with_access(igvm.to_path_buf(), "r")?; + landlock.add_rule_with_access(igvm, "r")?; } Ok(()) @@ -877,7 +878,7 @@ pub struct TpmConfig { impl ApplyLandlock for TpmConfig { fn apply_landlock(&self, landlock: &mut Landlock) -> LandlockResult<()> { - landlock.add_rule_with_access(self.socket.to_path_buf(), "rw")?; + landlock.add_rule_with_access(&self.socket, "rw")?; Ok(()) } } @@ -890,7 +891,7 @@ pub struct LandlockConfig { impl ApplyLandlock for LandlockConfig { fn apply_landlock(&self, landlock: &mut Landlock) -> LandlockResult<()> { - landlock.add_rule_with_access(self.path.to_path_buf(), self.access.clone().as_str())?; + landlock.add_rule_with_access(&self.path, self.access.clone().as_str())?; Ok(()) } } @@ -990,7 +991,7 @@ impl VmConfig { } if let Some(devices) = &self.devices { - landlock.add_rule_with_access("/dev/vfio/vfio".into(), "rw")?; + landlock.add_rule_with_access(Path::new("/dev/vfio/vfio"), "rw")?; for device in devices.iter() { device.apply_landlock(&mut landlock)?; @@ -1022,7 +1023,7 @@ impl VmConfig { } if self.net.is_some() { - landlock.add_rule_with_access("/dev/net/tun".into(), "rw")?; + landlock.add_rule_with_access(Path::new("/dev/net/tun"), "rw")?; } if let Some(landlock_rules) = &self.landlock_rules { From 08b197bbc1e2ae2ac06c312dfbe9ddc5c704f6d5 Mon Sep 17 00:00:00 2001 From: Alyssa Ross Date: Tue, 9 Sep 2025 10:21:38 +0200 Subject: [PATCH 158/294] vmm: fix vsock with landlock Without write access to the directory the socket will go in, it's not possible to create the socket. I've tested outgoing connections from the VM, and they don't seem to need read permissions on that directory to connect to a socket on the host. Fixes: b3e5738b4 ("vmm: Introduce ApplyLandlock trait") Signed-off-by: Alyssa Ross --- vmm/src/vm_config.rs | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/vmm/src/vm_config.rs b/vmm/src/vm_config.rs index 38c0a0ba40..aff0d95f35 100644 --- a/vmm/src/vm_config.rs +++ b/vmm/src/vm_config.rs @@ -647,7 +647,12 @@ pub struct VsockConfig { impl ApplyLandlock for VsockConfig { fn apply_landlock(&self, landlock: &mut Landlock) -> LandlockResult<()> { + if let Some(parent) = self.socket.parent() { + landlock.add_rule_with_access(parent, "w")?; + } + landlock.add_rule_with_access(&self.socket, "rw")?; + Ok(()) } } From e7e850bbdd4b995f65876b588f617204c6ea952b Mon Sep 17 00:00:00 2001 From: Muminul Islam Date: Mon, 8 Sep 2025 16:04:40 -0700 Subject: [PATCH 159/294] tests: enable more test cases for MSHV MSHV now supports movable pages i.e VA backed guest. Also with more features and stability in the MSHV Kernel drives MSHV now supports more test scenario. This patch enables more integration test cases. Signed-off-by: Muminul Islam --- tests/integration.rs | 12 ------------ 1 file changed, 12 deletions(-) diff --git a/tests/integration.rs b/tests/integration.rs index 94f79dac6d..45662c786c 100644 --- a/tests/integration.rs +++ b/tests/integration.rs @@ -2849,7 +2849,6 @@ mod common_parallel { } #[test] - #[cfg(not(feature = "mshv"))] fn test_user_defined_memory_regions() { let focal = UbuntuDiskConfig::new(FOCAL_IMAGE_NAME.to_string()); let guest = Guest::new(Box::new(focal)); @@ -2917,7 +2916,6 @@ mod common_parallel { } #[test] - #[cfg(not(feature = "mshv"))] fn test_guest_numa_nodes() { _test_guest_numa_nodes(false); } @@ -3914,13 +3912,11 @@ mod common_parallel { } #[test] - #[cfg(not(feature = "mshv"))] fn test_virtio_fs_multi_segment_hotplug() { _test_virtio_fs(&prepare_virtiofsd, true, Some(15)) } #[test] - #[cfg(not(feature = "mshv"))] fn test_virtio_fs_multi_segment() { _test_virtio_fs(&prepare_virtiofsd, false, Some(15)) } @@ -5107,7 +5103,6 @@ mod common_parallel { } #[test] - #[cfg(not(feature = "mshv"))] fn test_virtio_mem() { let focal = UbuntuDiskConfig::new(FOCAL_IMAGE_NAME.to_string()); let guest = Guest::new(Box::new(focal)); @@ -5181,7 +5176,6 @@ mod common_parallel { #[test] #[cfg(target_arch = "x86_64")] - #[cfg(not(feature = "mshv"))] // Test both vCPU and memory resizing together fn test_resize() { let focal = UbuntuDiskConfig::new(FOCAL_IMAGE_NAME.to_string()); @@ -5809,7 +5803,6 @@ mod common_parallel { } #[test] - #[cfg(not(feature = "mshv"))] fn test_virtio_balloon_free_page_reporting() { let focal = UbuntuDiskConfig::new(FOCAL_IMAGE_NAME.to_string()); let guest = Guest::new(Box::new(focal)); @@ -7887,7 +7880,6 @@ mod common_sequential { // through each ssh command. There's no need to perform a dedicated test to // verify the migration went well for virtio-net. #[test] - #[cfg(not(feature = "mshv"))] fn test_snapshot_restore_hotplug_virtiomem() { _test_snapshot_restore(true); } @@ -11164,25 +11156,21 @@ mod live_migration { } #[test] - #[cfg(not(feature = "mshv"))] fn test_live_migration_numa() { _test_live_migration_numa(false, false) } #[test] - #[cfg(not(feature = "mshv"))] fn test_live_migration_numa_local() { _test_live_migration_numa(false, true) } #[test] - #[cfg(not(feature = "mshv"))] fn test_live_upgrade_numa() { _test_live_migration_numa(true, false) } #[test] - #[cfg(not(feature = "mshv"))] fn test_live_upgrade_numa_local() { _test_live_migration_numa(true, true) } From c7eac0f65a83b9cd4fdc9d3edb918f9efcbe0b34 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 8 Sep 2025 23:01:53 +0000 Subject: [PATCH 160/294] build: Bump clap from 4.5.13 to 4.5.47 Bumps [clap](https://github.com/clap-rs/clap) from 4.5.13 to 4.5.47. - [Release notes](https://github.com/clap-rs/clap/releases) - [Changelog](https://github.com/clap-rs/clap/blob/master/CHANGELOG.md) - [Commits](https://github.com/clap-rs/clap/compare/clap_complete-v4.5.13...clap_complete-v4.5.47) --- updated-dependencies: - dependency-name: clap dependency-version: 4.5.47 dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] --- Cargo.lock | 25 ++++++++----------------- Cargo.toml | 2 +- 2 files changed, 9 insertions(+), 18 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 0b896638f4..61bc88a524 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -371,18 +371,18 @@ checksum = "613afe47fcd5fac7ccf1db93babcb082c5994d996f20b8b159f2ad1658eb5724" [[package]] name = "clap" -version = "4.5.13" +version = "4.5.47" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0fbb260a053428790f3de475e304ff84cdbc4face759ea7a3e64c1edd938a7fc" +checksum = "7eac00902d9d136acd712710d71823fb8ac8004ca445a89e73a41d45aa712931" dependencies = [ "clap_builder", ] [[package]] name = "clap_builder" -version = "4.5.13" +version = "4.5.47" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "64b17d7ea74e9f833c7dbf2cbe4fb12ff26783eda4782a8975b72f895c9b4d99" +checksum = "2ad9bbf750e73b5884fb8a211a9424a1906c1e156724260fdae972f31d70e1d6" dependencies = [ "anstream", "anstyle", @@ -2047,12 +2047,12 @@ dependencies = [ [[package]] name = "terminal_size" -version = "0.3.0" +version = "0.4.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "21bebf2b7c9e0a515f6e0f8c51dc0f8e4696391e6f1ff30379559f8365fb0df7" +checksum = "60b8cb979cb11c32ce1603f8137b22262a9d131aaa5c37b5678025f22b8becd0" dependencies = [ - "rustix 0.38.44", - "windows-sys 0.48.0", + "rustix 1.0.7", + "windows-sys 0.60.2", ] [[package]] @@ -2617,15 +2617,6 @@ version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" -[[package]] -name = "windows-sys" -version = "0.48.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "677d2418bec65e3338edb076e806bc1ec15693c5d0104683f2efe857f61056a9" -dependencies = [ - "windows-targets 0.48.5", -] - [[package]] name = "windows-sys" version = "0.52.0" diff --git a/Cargo.toml b/Cargo.toml index 74cdbbeb17..d216a795ba 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -139,7 +139,7 @@ anyhow = "1.0.98" bitflags = "2.9.3" byteorder = "1.5.0" cfg-if = "1.0.0" -clap = "4.5.13" +clap = "4.5.47" dhat = "0.3.3" dirs = "6.0.0" env_logger = "0.11.8" From 533d3a85d13af3c8d86398b81526266db070f062 Mon Sep 17 00:00:00 2001 From: Alyssa Ross Date: Mon, 8 Sep 2025 17:41:11 +0200 Subject: [PATCH 161/294] vmm: fix landlock on aarch64 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit arch::aarch64::fdt::create_cpu_nodes will always look at this if it exists. (If it doesn't exist, this is a no-op — add_rule_with_access() won't add rules for paths that don't exist.) Fixes: b3e5738b4 ("vmm: Introduce ApplyLandlock trait") Signed-off-by: Alyssa Ross --- vmm/src/vm_config.rs | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/vmm/src/vm_config.rs b/vmm/src/vm_config.rs index aff0d95f35..96269bfb27 100644 --- a/vmm/src/vm_config.rs +++ b/vmm/src/vm_config.rs @@ -960,6 +960,11 @@ impl VmConfig { pub(crate) fn apply_landlock(&self) -> LandlockResult<()> { let mut landlock = Landlock::new()?; + #[cfg(target_arch = "aarch64")] + { + landlock.add_rule_with_access(Path::new("/sys/devices/system/cpu/cpu0/cache"), "r")?; + } + if let Some(mem_zones) = &self.memory.zones { for zone in mem_zones.iter() { zone.apply_landlock(&mut landlock)?; From 6cac99bd76dc048493b0085c85695639c31cff35 Mon Sep 17 00:00:00 2001 From: Alyssa Ross Date: Tue, 9 Sep 2025 10:53:10 +0200 Subject: [PATCH 162/294] vmm: error if landlock can't be enabled Since the user has to explicitly ask for Landlock to be enabled in Cloud Hypervisor, it's surprising that, even when they've done that, Landlock will silently not be enabled if the kernel doesn't support it. To prevent accidental absence of a desired security protection, exit with an error if Landlock, or the one feature of it we use in Cloud Hypervisor (file access) is not supported. Signed-off-by: Alyssa Ross --- vmm/src/landlock.rs | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/vmm/src/landlock.rs b/vmm/src/landlock.rs index 5f139e5c1c..b7fbf6edb7 100644 --- a/vmm/src/landlock.rs +++ b/vmm/src/landlock.rs @@ -9,8 +9,8 @@ use std::path::Path; #[cfg(test)] use landlock::make_bitflags; use landlock::{ - path_beneath_rules, Access, AccessFs, BitFlags, Ruleset, RulesetAttr, RulesetCreated, - RulesetCreatedAttr, RulesetError, ABI, + path_beneath_rules, Access, AccessFs, BitFlags, Compatible, Ruleset, RulesetAttr, + RulesetCreated, RulesetCreatedAttr, RulesetError, ABI, }; use thiserror::Error; @@ -75,8 +75,10 @@ impl Landlock { let file_access = AccessFs::from_all(ABI); let def_ruleset = Ruleset::default() + .set_compatibility(landlock::CompatLevel::HardRequirement) .handle_access(file_access) - .map_err(LandlockError::ManageRuleset)?; + .map_err(LandlockError::ManageRuleset)? + .set_compatibility(landlock::CompatLevel::HardRequirement); // By default, rulesets are created in `BestEffort` mode. This lets Landlock // to enable all the supported rules and silently ignore the unsupported ones. From 305bec056fa7d2bcb9632d63021cc7a102b0de0c Mon Sep 17 00:00:00 2001 From: Alyssa Ross Date: Tue, 9 Sep 2025 16:22:28 +0200 Subject: [PATCH 163/294] arch: aarch64: fdt: replace broken link The previously linked file was converted to ReStructuredText, then later removed from the kernel, with part of the justification being "Most of what booting-without-of.rst contains is now in the DT specification", so point to that instead. Signed-off-by: Alyssa Ross --- arch/src/aarch64/fdt.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/src/aarch64/fdt.rs b/arch/src/aarch64/fdt.rs index aefecf8b2d..1669f980ba 100644 --- a/arch/src/aarch64/fdt.rs +++ b/arch/src/aarch64/fdt.rs @@ -231,8 +231,8 @@ pub fn create_fdt Date: Tue, 9 Sep 2025 15:52:40 +0200 Subject: [PATCH 164/294] seccomp: avoid hardcoding ioctl numbers These can differ between platforms, so it's better to use centralized definitions of them. We can't currently do this for the KVM and VFIO ioctls, because the corresponding crates don't publicly expose the ioctl numbers. Signed-off-by: Alyssa Ross --- Cargo.lock | 1 + virtio-devices/src/seccomp_filters.rs | 14 +- vmm/Cargo.toml | 1 + vmm/src/seccomp_filters.rs | 210 ++++++++++++-------------- 4 files changed, 100 insertions(+), 126 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 61bc88a524..82933af3ec 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2489,6 +2489,7 @@ dependencies = [ "uuid", "vfio-ioctls", "vfio_user", + "vhost", "virtio-bindings", "virtio-devices", "vm-allocator", diff --git a/virtio-devices/src/seccomp_filters.rs b/virtio-devices/src/seccomp_filters.rs index 2579e4f142..abad842bb1 100644 --- a/virtio-devices/src/seccomp_filters.rs +++ b/virtio-devices/src/seccomp_filters.rs @@ -4,6 +4,7 @@ // // SPDX-License-Identifier: Apache-2.0 +use libc::{FIONBIO, TIOCGWINSZ, TUNSETOFFLOAD}; use seccompiler::SeccompCmpOp::Eq; use seccompiler::{ BpfProgram, Error, SeccompAction, SeccompCmpArgLen as ArgLen, SeccompCondition as Cond, @@ -46,17 +47,10 @@ macro_rules! or { ($($x:expr),*) => (vec![$($x),*]) } -// See include/uapi/asm-generic/ioctls.h in the kernel code. -const TIOCGWINSZ: u64 = 0x5413; -const FIONBIO: u64 = 0x5421; - // See include/uapi/linux/vfio.h in the kernel code. const VFIO_IOMMU_MAP_DMA: u64 = 0x3b71; const VFIO_IOMMU_UNMAP_DMA: u64 = 0x3b72; -// See include/uapi/linux/if_tun.h in the kernel code. -const TUNSETOFFLOAD: u64 = 0x4004_54d0; - #[cfg(feature = "sev_snp")] fn mshv_sev_snp_ioctl_seccomp_rule() -> SeccompRule { and![Cond::new( @@ -75,7 +69,7 @@ fn create_mshv_sev_snp_ioctl_seccomp_rule() -> Vec { fn create_virtio_console_ioctl_seccomp_rule() -> Vec { or![ - and![Cond::new(1, ArgLen::Dword, Eq, TIOCGWINSZ).unwrap()], + and![Cond::new(1, ArgLen::Dword, Eq, TIOCGWINSZ as _).unwrap()], #[cfg(feature = "sev_snp")] mshv_sev_snp_ioctl_seccomp_rule(), ] @@ -157,7 +151,7 @@ fn virtio_net_thread_rules() -> Vec<(i64, Vec)> { fn create_virtio_net_ctl_ioctl_seccomp_rule() -> Vec { or![ - and![Cond::new(1, ArgLen::Dword, Eq, TUNSETOFFLOAD).unwrap()], + and![Cond::new(1, ArgLen::Dword, Eq, TUNSETOFFLOAD as _).unwrap()], #[cfg(feature = "sev_snp")] mshv_sev_snp_ioctl_seccomp_rule(), ] @@ -231,7 +225,7 @@ fn virtio_vhost_block_thread_rules() -> Vec<(i64, Vec)> { fn create_vsock_ioctl_seccomp_rule() -> Vec { or![ - and![Cond::new(1, ArgLen::Dword, Eq, FIONBIO,).unwrap()], + and![Cond::new(1, ArgLen::Dword, Eq, FIONBIO as _).unwrap()], #[cfg(feature = "sev_snp")] mshv_sev_snp_ioctl_seccomp_rule(), ] diff --git a/vmm/Cargo.toml b/vmm/Cargo.toml index 6ebabc01ae..7793357e2d 100644 --- a/vmm/Cargo.toml +++ b/vmm/Cargo.toml @@ -78,6 +78,7 @@ tracer = { path = "../tracer" } uuid = { workspace = true } vfio-ioctls = { workspace = true, default-features = false } vfio_user = { workspace = true } +vhost = { workspace = true } virtio-bindings = { workspace = true } virtio-devices = { path = "../virtio-devices" } vm-allocator = { path = "../vm-allocator" } diff --git a/vmm/src/seccomp_filters.rs b/vmm/src/seccomp_filters.rs index 46e38f9c8e..3dacd30463 100644 --- a/vmm/src/seccomp_filters.rs +++ b/vmm/src/seccomp_filters.rs @@ -5,11 +5,25 @@ // SPDX-License-Identifier: Apache-2.0 use hypervisor::HypervisorType; +use libc::{ + BLKIOMIN, BLKIOOPT, BLKPBSZGET, BLKSSZGET, FIOCLEX, FIONBIO, SIOCGIFFLAGS, SIOCGIFHWADDR, + SIOCGIFINDEX, SIOCGIFMTU, SIOCSIFADDR, SIOCSIFFLAGS, SIOCSIFHWADDR, SIOCSIFMTU, SIOCSIFNETMASK, + TCGETS, TCGETS2, TCSETS, TCSETS2, TIOCGPGRP, TIOCGPTPEER, TIOCGWINSZ, TIOCSCTTY, TIOCSPGRP, + TIOCSPTLCK, TUNGETFEATURES, TUNGETIFF, TUNSETIFF, TUNSETOFFLOAD, TUNSETVNETHDRSZ, +}; use seccompiler::SeccompCmpOp::Eq; use seccompiler::{ BackendError, BpfProgram, Error, SeccompAction, SeccompCmpArgLen as ArgLen, SeccompCondition as Cond, SeccompFilter, SeccompRule, }; +use vhost::vhost_kern::vhost_binding::{ + VHOST_GET_BACKEND_FEATURES, VHOST_GET_FEATURES, VHOST_SET_BACKEND_FEATURES, VHOST_SET_FEATURES, + VHOST_SET_OWNER, VHOST_SET_VRING_ADDR, VHOST_SET_VRING_BASE, VHOST_SET_VRING_CALL, + VHOST_SET_VRING_KICK, VHOST_SET_VRING_NUM, VHOST_VDPA_GET_CONFIG, VHOST_VDPA_GET_CONFIG_SIZE, + VHOST_VDPA_GET_DEVICE_ID, VHOST_VDPA_GET_IOVA_RANGE, VHOST_VDPA_GET_STATUS, + VHOST_VDPA_GET_VRING_NUM, VHOST_VDPA_SET_CONFIG, VHOST_VDPA_SET_CONFIG_CALL, + VHOST_VDPA_SET_STATUS, VHOST_VDPA_SET_VRING_ENABLE, VHOST_VDPA_SUSPEND, +}; pub enum Thread { HttpApi, @@ -40,44 +54,6 @@ macro_rules! or { ($($x:expr),*) => (vec![$($x),*]) } -// See include/uapi/asm-generic/ioctls.h in the kernel code. -const TCGETS: u64 = 0x5401; -const TCGETS2: u64 = 0x802c_542a; -const TCSETS: u64 = 0x5402; -const TCSETS2: u64 = 0x402c_542b; -const TIOCSCTTY: u64 = 0x540E; -const TIOCGPGRP: u64 = 0x540F; -const TIOCSPGRP: u64 = 0x5410; -const TIOCGWINSZ: u64 = 0x5413; -const TIOCSPTLCK: u64 = 0x4004_5431; -const TIOCGPTPEER: u64 = 0x5441; -const FIOCLEX: u64 = 0x5451; -const FIONBIO: u64 = 0x5421; - -// See include/uapi/linux/fs.h in the kernel code. -const BLKSSZGET: u64 = 0x1268; -const BLKPBSZGET: u64 = 0x127b; -const BLKIOMIN: u64 = 0x1278; -const BLKIOOPT: u64 = 0x1279; - -// See include/uapi/linux/if_tun.h in the kernel code. -const TUNGETIFF: u64 = 0x8004_54d2; -const TUNSETIFF: u64 = 0x4004_54ca; -const TUNSETOFFLOAD: u64 = 0x4004_54d0; -const TUNSETVNETHDRSZ: u64 = 0x4004_54d8; -const TUNGETFEATURES: u64 = 0x8004_54cf; - -// See include/uapi/linux/sockios.h in the kernel code. -const SIOCGIFFLAGS: u64 = 0x8913; -const SIOCSIFFLAGS: u64 = 0x8914; -const SIOCSIFADDR: u64 = 0x8916; -const SIOCSIFNETMASK: u64 = 0x891c; -const SIOCGIFMTU: u64 = 0x8921; -const SIOCSIFMTU: u64 = 0x8922; -const SIOCSIFHWADDR: u64 = 0x8924; -const SIOCGIFHWADDR: u64 = 0x8927; -const SIOCGIFINDEX: u64 = 0x8933; - // See include/uapi/linux/vfio.h in the kernel code. const VFIO_GET_API_VERSION: u64 = 0x3b64; const VFIO_CHECK_EXTENSION: u64 = 0x3b65; @@ -95,29 +71,6 @@ const VFIO_IOMMU_MAP_DMA: u64 = 0x3b71; const VFIO_IOMMU_UNMAP_DMA: u64 = 0x3b72; const VFIO_DEVICE_IOEVENTFD: u64 = 0x3b74; -// See include/uapi/linux/vhost.h in the kernel code -const VHOST_GET_FEATURES: u64 = 0x8008af00; -const VHOST_SET_FEATURES: u64 = 0x4008af00; -const VHOST_SET_OWNER: u64 = 0xaf01; -const VHOST_SET_VRING_NUM: u64 = 0x4008af10; -const VHOST_SET_VRING_ADDR: u64 = 0x4028af11; -const VHOST_SET_VRING_BASE: u64 = 0x4008af12; -const VHOST_SET_VRING_KICK: u64 = 0x4008af20; -const VHOST_SET_VRING_CALL: u64 = 0x4008af21; -const VHOST_SET_BACKEND_FEATURES: u64 = 0x4008af25; -const VHOST_GET_BACKEND_FEATURES: u64 = 0x8008af26; -const VHOST_VDPA_GET_DEVICE_ID: u64 = 0x8004af70; -const VHOST_VDPA_GET_STATUS: u64 = 0x8001af71; -const VHOST_VDPA_SET_STATUS: u64 = 0x4001af72; -const VHOST_VDPA_GET_CONFIG: u64 = 0x8008af73; -const VHOST_VDPA_SET_CONFIG: u64 = 0x4008af74; -const VHOST_VDPA_SET_VRING_ENABLE: u64 = 0x4008af75; -const VHOST_VDPA_GET_VRING_NUM: u64 = 0x8002af76; -const VHOST_VDPA_SET_CONFIG_CALL: u64 = 0x4004af77; -const VHOST_VDPA_GET_IOVA_RANGE: u64 = 0x8010af78; -const VHOST_VDPA_GET_CONFIG_SIZE: u64 = 0x8004af79; -const VHOST_VDPA_SUSPEND: u64 = 0xaf7d; - // See include/uapi/linux/kvm.h in the kernel code. #[cfg(feature = "kvm")] mod kvm { @@ -297,12 +250,12 @@ fn create_vmm_ioctl_seccomp_rule_common( hypervisor_type: HypervisorType, ) -> Result, BackendError> { let mut common_rules = or![ - and![Cond::new(1, ArgLen::Dword, Eq, BLKSSZGET)?], - and![Cond::new(1, ArgLen::Dword, Eq, BLKPBSZGET)?], - and![Cond::new(1, ArgLen::Dword, Eq, BLKIOMIN)?], - and![Cond::new(1, ArgLen::Dword, Eq, BLKIOOPT)?], - and![Cond::new(1, ArgLen::Dword, Eq, FIOCLEX)?], - and![Cond::new(1, ArgLen::Dword, Eq, FIONBIO)?], + and![Cond::new(1, ArgLen::Dword, Eq, BLKSSZGET as _)?], + and![Cond::new(1, ArgLen::Dword, Eq, BLKPBSZGET as _)?], + and![Cond::new(1, ArgLen::Dword, Eq, BLKIOMIN as _)?], + and![Cond::new(1, ArgLen::Dword, Eq, BLKIOOPT as _)?], + and![Cond::new(1, ArgLen::Dword, Eq, FIOCLEX as _)?], + and![Cond::new(1, ArgLen::Dword, Eq, FIONBIO as _)?], and![Cond::new(1, ArgLen::Dword, Eq, SIOCGIFFLAGS)?], and![Cond::new(1, ArgLen::Dword, Eq, SIOCGIFHWADDR)?], and![Cond::new(1, ArgLen::Dword, Eq, SIOCGIFMTU)?], @@ -312,21 +265,21 @@ fn create_vmm_ioctl_seccomp_rule_common( and![Cond::new(1, ArgLen::Dword, Eq, SIOCSIFHWADDR)?], and![Cond::new(1, ArgLen::Dword, Eq, SIOCSIFMTU)?], and![Cond::new(1, ArgLen::Dword, Eq, SIOCSIFNETMASK)?], - and![Cond::new(1, ArgLen::Dword, Eq, TCSETS)?], - and![Cond::new(1, ArgLen::Dword, Eq, TCSETS2)?], - and![Cond::new(1, ArgLen::Dword, Eq, TCGETS)?], - and![Cond::new(1, ArgLen::Dword, Eq, TCGETS2)?], - and![Cond::new(1, ArgLen::Dword, Eq, TIOCGPGRP)?], - and![Cond::new(1, ArgLen::Dword, Eq, TIOCGPTPEER)?], - and![Cond::new(1, ArgLen::Dword, Eq, TIOCGWINSZ)?], - and![Cond::new(1, ArgLen::Dword, Eq, TIOCSCTTY)?], - and![Cond::new(1, ArgLen::Dword, Eq, TIOCSPGRP)?], - and![Cond::new(1, ArgLen::Dword, Eq, TIOCSPTLCK)?], - and![Cond::new(1, ArgLen::Dword, Eq, TUNGETFEATURES)?], - and![Cond::new(1, ArgLen::Dword, Eq, TUNGETIFF)?], - and![Cond::new(1, ArgLen::Dword, Eq, TUNSETIFF)?], - and![Cond::new(1, ArgLen::Dword, Eq, TUNSETOFFLOAD)?], - and![Cond::new(1, ArgLen::Dword, Eq, TUNSETVNETHDRSZ)?], + and![Cond::new(1, ArgLen::Dword, Eq, TCSETS as _)?], + and![Cond::new(1, ArgLen::Dword, Eq, TCSETS2 as _)?], + and![Cond::new(1, ArgLen::Dword, Eq, TCGETS as _)?], + and![Cond::new(1, ArgLen::Dword, Eq, TCGETS2 as _)?], + and![Cond::new(1, ArgLen::Dword, Eq, TIOCGPGRP as _)?], + and![Cond::new(1, ArgLen::Dword, Eq, TIOCGPTPEER as _)?], + and![Cond::new(1, ArgLen::Dword, Eq, TIOCGWINSZ as _)?], + and![Cond::new(1, ArgLen::Dword, Eq, TIOCSCTTY as _)?], + and![Cond::new(1, ArgLen::Dword, Eq, TIOCSPGRP as _)?], + and![Cond::new(1, ArgLen::Dword, Eq, TIOCSPTLCK as _)?], + and![Cond::new(1, ArgLen::Dword, Eq, TUNGETFEATURES as _)?], + and![Cond::new(1, ArgLen::Dword, Eq, TUNGETIFF as _)?], + and![Cond::new(1, ArgLen::Dword, Eq, TUNSETIFF as _)?], + and![Cond::new(1, ArgLen::Dword, Eq, TUNSETOFFLOAD as _)?], + and![Cond::new(1, ArgLen::Dword, Eq, TUNSETVNETHDRSZ as _)?], and![Cond::new(1, ArgLen::Dword, Eq, VFIO_GET_API_VERSION)?], and![Cond::new(1, ArgLen::Dword, Eq, VFIO_CHECK_EXTENSION)?], and![Cond::new(1, ArgLen::Dword, Eq, VFIO_SET_IOMMU)?], @@ -347,32 +300,57 @@ fn create_vmm_ioctl_seccomp_rule_common( and![Cond::new(1, ArgLen::Dword, Eq, VFIO_IOMMU_MAP_DMA)?], and![Cond::new(1, ArgLen::Dword, Eq, VFIO_IOMMU_UNMAP_DMA)?], and![Cond::new(1, ArgLen::Dword, Eq, VFIO_DEVICE_IOEVENTFD)?], - and![Cond::new(1, ArgLen::Dword, Eq, VHOST_GET_FEATURES)?], - and![Cond::new(1, ArgLen::Dword, Eq, VHOST_SET_FEATURES)?], - and![Cond::new(1, ArgLen::Dword, Eq, VHOST_SET_OWNER)?], - and![Cond::new(1, ArgLen::Dword, Eq, VHOST_SET_VRING_NUM)?], - and![Cond::new(1, ArgLen::Dword, Eq, VHOST_SET_VRING_ADDR)?], - and![Cond::new(1, ArgLen::Dword, Eq, VHOST_SET_VRING_BASE)?], - and![Cond::new(1, ArgLen::Dword, Eq, VHOST_SET_VRING_KICK)?], - and![Cond::new(1, ArgLen::Dword, Eq, VHOST_SET_VRING_CALL)?], - and![Cond::new(1, ArgLen::Dword, Eq, VHOST_SET_BACKEND_FEATURES)?], - and![Cond::new(1, ArgLen::Dword, Eq, VHOST_GET_BACKEND_FEATURES)?], - and![Cond::new(1, ArgLen::Dword, Eq, VHOST_VDPA_GET_DEVICE_ID)?], - and![Cond::new(1, ArgLen::Dword, Eq, VHOST_VDPA_GET_STATUS)?], - and![Cond::new(1, ArgLen::Dword, Eq, VHOST_VDPA_SET_STATUS)?], - and![Cond::new(1, ArgLen::Dword, Eq, VHOST_VDPA_GET_CONFIG)?], - and![Cond::new(1, ArgLen::Dword, Eq, VHOST_VDPA_SET_CONFIG)?], + and![Cond::new(1, ArgLen::Dword, Eq, VHOST_GET_FEATURES())?], + and![Cond::new(1, ArgLen::Dword, Eq, VHOST_SET_FEATURES())?], + and![Cond::new(1, ArgLen::Dword, Eq, VHOST_SET_OWNER())?], + and![Cond::new(1, ArgLen::Dword, Eq, VHOST_SET_VRING_NUM())?], + and![Cond::new(1, ArgLen::Dword, Eq, VHOST_SET_VRING_ADDR())?], + and![Cond::new(1, ArgLen::Dword, Eq, VHOST_SET_VRING_BASE())?], + and![Cond::new(1, ArgLen::Dword, Eq, VHOST_SET_VRING_KICK())?], + and![Cond::new(1, ArgLen::Dword, Eq, VHOST_SET_VRING_CALL())?], + and![Cond::new( + 1, + ArgLen::Dword, + Eq, + VHOST_SET_BACKEND_FEATURES() + )?], + and![Cond::new( + 1, + ArgLen::Dword, + Eq, + VHOST_GET_BACKEND_FEATURES() + )?], + and![Cond::new(1, ArgLen::Dword, Eq, VHOST_VDPA_GET_DEVICE_ID())?], + and![Cond::new(1, ArgLen::Dword, Eq, VHOST_VDPA_GET_STATUS())?], + and![Cond::new(1, ArgLen::Dword, Eq, VHOST_VDPA_SET_STATUS())?], + and![Cond::new(1, ArgLen::Dword, Eq, VHOST_VDPA_GET_CONFIG())?], + and![Cond::new(1, ArgLen::Dword, Eq, VHOST_VDPA_SET_CONFIG())?], + and![Cond::new( + 1, + ArgLen::Dword, + Eq, + VHOST_VDPA_SET_VRING_ENABLE(), + )?], + and![Cond::new(1, ArgLen::Dword, Eq, VHOST_VDPA_GET_VRING_NUM())?], + and![Cond::new( + 1, + ArgLen::Dword, + Eq, + VHOST_VDPA_SET_CONFIG_CALL() + )?], + and![Cond::new( + 1, + ArgLen::Dword, + Eq, + VHOST_VDPA_GET_IOVA_RANGE() + )?], and![Cond::new( 1, ArgLen::Dword, Eq, - VHOST_VDPA_SET_VRING_ENABLE + VHOST_VDPA_GET_CONFIG_SIZE() )?], - and![Cond::new(1, ArgLen::Dword, Eq, VHOST_VDPA_GET_VRING_NUM)?], - and![Cond::new(1, ArgLen::Dword, Eq, VHOST_VDPA_SET_CONFIG_CALL)?], - and![Cond::new(1, ArgLen::Dword, Eq, VHOST_VDPA_GET_IOVA_RANGE)?], - and![Cond::new(1, ArgLen::Dword, Eq, VHOST_VDPA_GET_CONFIG_SIZE)?], - and![Cond::new(1, ArgLen::Dword, Eq, VHOST_VDPA_SUSPEND)?], + and![Cond::new(1, ArgLen::Dword, Eq, VHOST_VDPA_SUSPEND())?], ]; let hypervisor_rules = create_vmm_ioctl_seccomp_rule_hypervisor(hypervisor_type)?; @@ -485,16 +463,16 @@ fn create_vmm_ioctl_seccomp_rule( } fn create_api_ioctl_seccomp_rule() -> Result, BackendError> { - Ok(or![and![Cond::new(1, ArgLen::Dword, Eq, FIONBIO)?]]) + Ok(or![and![Cond::new(1, ArgLen::Dword, Eq, FIONBIO as _)?]]) } fn create_signal_handler_ioctl_seccomp_rule() -> Result, BackendError> { Ok(or![ - and![Cond::new(1, ArgLen::Dword, Eq, TCGETS)?], - and![Cond::new(1, ArgLen::Dword, Eq, TCGETS2)?], - and![Cond::new(1, ArgLen::Dword, Eq, TCSETS)?], - and![Cond::new(1, ArgLen::Dword, Eq, TCSETS2)?], - and![Cond::new(1, ArgLen::Dword, Eq, TIOCGWINSZ)?], + and![Cond::new(1, ArgLen::Dword, Eq, TCGETS as _)?], + and![Cond::new(1, ArgLen::Dword, Eq, TCGETS2 as _)?], + and![Cond::new(1, ArgLen::Dword, Eq, TCSETS as _)?], + and![Cond::new(1, ArgLen::Dword, Eq, TCSETS2 as _)?], + and![Cond::new(1, ArgLen::Dword, Eq, TIOCGWINSZ as _)?], ]) } @@ -526,9 +504,9 @@ fn signal_handler_thread_rules() -> Result)>, Backend fn create_pty_foreground_ioctl_seccomp_rule() -> Result, BackendError> { Ok(or![ - and![Cond::new(1, ArgLen::Dword, Eq, TIOCGPGRP)?], - and![Cond::new(1, ArgLen::Dword, Eq, TIOCSCTTY)?], - and![Cond::new(1, ArgLen::Dword, Eq, TIOCSPGRP)?], + and![Cond::new(1, ArgLen::Dword, Eq, TIOCGPGRP as _)?], + and![Cond::new(1, ArgLen::Dword, Eq, TIOCSCTTY as _)?], + and![Cond::new(1, ArgLen::Dword, Eq, TIOCSPGRP as _)?], ]) } @@ -773,14 +751,14 @@ fn create_vcpu_ioctl_seccomp_rule( and![Cond::new(1, ArgLen::Dword, Eq, VFIO_GROUP_UNSET_CONTAINER)?], and![Cond::new(1, ArgLen::Dword, Eq, VFIO_IOMMU_MAP_DMA)?], and![Cond::new(1, ArgLen::Dword, Eq, VFIO_IOMMU_UNMAP_DMA)?], - and![Cond::new(1, ArgLen::Dword, Eq, VHOST_VDPA_SET_STATUS)?], - and![Cond::new(1, ArgLen::Dword, Eq, VHOST_VDPA_GET_CONFIG)?], - and![Cond::new(1, ArgLen::Dword, Eq, VHOST_VDPA_SET_CONFIG)?], + and![Cond::new(1, ArgLen::Dword, Eq, VHOST_VDPA_SET_STATUS())?], + and![Cond::new(1, ArgLen::Dword, Eq, VHOST_VDPA_GET_CONFIG())?], + and![Cond::new(1, ArgLen::Dword, Eq, VHOST_VDPA_SET_CONFIG())?], and![Cond::new( 1, ArgLen::Dword, Eq, - VHOST_VDPA_SET_VRING_ENABLE + VHOST_VDPA_SET_VRING_ENABLE(), )?], ]; From e4af58b88eeb6cb8444d949a3ef61e9f4d5bf9b3 Mon Sep 17 00:00:00 2001 From: Alyssa Ross Date: Tue, 9 Sep 2025 16:01:56 +0200 Subject: [PATCH 165/294] arch: aarch64: fdt: reduce log verbosity This module warns for a lot of routine things that the user cannot do anything about. Closes: https://github.com/cloud-hypervisor/cloud-hypervisor/issues/7220 Signed-off-by: Alyssa Ross --- arch/src/aarch64/fdt.rs | 16 ---------------- 1 file changed, 16 deletions(-) diff --git a/arch/src/aarch64/fdt.rs b/arch/src/aarch64/fdt.rs index 1669f980ba..e6b9722270 100644 --- a/arch/src/aarch64/fdt.rs +++ b/arch/src/aarch64/fdt.rs @@ -110,11 +110,8 @@ pub fn get_cache_size(cache_level: CacheLevel) -> u32 { let file_path = Path::new(&file_directory); if !file_path.exists() { - warn!("File: {} does not exist.", file_directory); 0 } else { - info!("File: {} exist.", file_directory); - let src = fs::read_to_string(file_directory).expect("File not exists or file corrupted."); // The content of the file is as simple as a size, like: "32K" let src = src.trim(); @@ -144,11 +141,8 @@ pub fn get_cache_coherency_line_size(cache_level: CacheLevel) -> u32 { let file_path = Path::new(&file_directory); if !file_path.exists() { - warn!("File: {} does not exist.", file_directory); 0 } else { - info!("File: {} exist.", file_directory); - let src = fs::read_to_string(file_directory).expect("File not exists or file corrupted."); src.trim().parse::().unwrap() } @@ -167,11 +161,8 @@ pub fn get_cache_number_of_sets(cache_level: CacheLevel) -> u32 { let file_path = Path::new(&file_directory); if !file_path.exists() { - warn!("File: {} does not exist.", file_directory); 0 } else { - info!("File: {} exist.", file_directory); - let src = fs::read_to_string(file_directory).expect("File not exists or file corrupted."); src.trim().parse::().unwrap() } @@ -195,11 +186,8 @@ pub fn get_cache_shared(cache_level: CacheLevel) -> bool { let file_path = Path::new(&file_directory); if !file_path.exists() { - warn!("File: {} does not exist.", file_directory); result = false; } else { - info!("File: {} exist.", file_directory); - let src = fs::read_to_string(file_directory).expect("File not exists or file corrupted."); let src = src.trim(); if src.is_empty() { @@ -325,7 +313,6 @@ fn create_cpu_nodes( if !cache_exist { warn!("cache sysfs system does not exist."); } else { - info!("cache sysfs system exists."); // L1 Data Cache Info. l1_d_cache_size = get_cache_size(CacheLevel::L1D); l1_d_cache_line_size = get_cache_coherency_line_size(CacheLevel::L1D); @@ -426,9 +413,6 @@ fn create_cpu_nodes( fdt.end_node(l2_cache_node)?; } - if l2_cache_size != 0 && l2_cache_shared { - warn!("L2 cache shared with other cpus"); - } } fdt.end_node(cpu_node)?; From 5737e58f293a8c50046bf9f9efc38c95e487d28f Mon Sep 17 00:00:00 2001 From: Wei Liu Date: Tue, 9 Sep 2025 18:24:01 +0000 Subject: [PATCH 166/294] tests: Use serial console in test_cpu_hotplug Virtio console is activated much later in boot. The output it spits out lacks the initial CPU configuration of the guest. Signed-off-by: Wei Liu --- tests/integration.rs | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/tests/integration.rs b/tests/integration.rs index 45662c786c..255c27adf1 100644 --- a/tests/integration.rs +++ b/tests/integration.rs @@ -4939,6 +4939,7 @@ mod common_parallel { let focal = UbuntuDiskConfig::new(FOCAL_IMAGE_NAME.to_string()); let guest = Guest::new(Box::new(focal)); let api_socket = temp_api_path(&guest.tmp_dir); + let console_str = "console=ttyS0"; let kernel_path = direct_kernel_boot_path(); @@ -4946,7 +4947,14 @@ mod common_parallel { .args(["--cpus", "boot=2,max=4"]) .args(["--memory", "size=512M"]) .args(["--kernel", kernel_path.to_str().unwrap()]) - .args(["--cmdline", DIRECT_KERNEL_BOOT_CMDLINE]) + .args([ + "--cmdline", + DIRECT_KERNEL_BOOT_CMDLINE + .replace("console=hvc0 ", console_str) + .as_str(), + ]) + .args(["--serial", "tty"]) + .args(["--console", "off"]) .default_disks() .default_net() .args(["--api-socket", &api_socket]) From 86c736fe76136f0ea97bf330796635a8d3bc6785 Mon Sep 17 00:00:00 2001 From: Anirudh Rayabharam Date: Tue, 9 Sep 2025 17:35:27 +0000 Subject: [PATCH 167/294] build: bump mshv and vfio crates Bump mshv-ioctls and mshv-bindings to 0.6.0. Most notably, this version contains fixes and new bindings for arm64 guests. Bump the vfio crates too so that they point to the latest mshv crates. Signed-off-by: Anirudh Rayabharam --- Cargo.lock | 20 +++++++-------- Cargo.toml | 10 ++++---- fuzz/Cargo.lock | 68 ++++++++++++++++++++++++------------------------- fuzz/Cargo.toml | 2 +- 4 files changed, 50 insertions(+), 50 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 82933af3ec..0111343fa8 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1225,9 +1225,9 @@ checksum = "c505b3e17ed6b70a7ed2e67fbb2c560ee327353556120d6e72f5232b6880d536" [[package]] name = "mshv-bindings" -version = "0.5.2" +version = "0.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "07f94f542c738f19317363222a7f415588c04cda964882479af41948ac3c3647" +checksum = "805cf329582f770f62cc612716a04c14815276ae266b6298375a672d3c5a5184" dependencies = [ "libc", "num_enum", @@ -1239,9 +1239,9 @@ dependencies = [ [[package]] name = "mshv-ioctls" -version = "0.5.2" +version = "0.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8a6df0848f14eb69505a28673f94acdd830cf248fb57022b21f24e242b702e66" +checksum = "aefaab4c067cf5226a917227640d835327b25b71a8d465f815f74f490344e10a" dependencies = [ "libc", "mshv-bindings", @@ -2228,18 +2228,18 @@ checksum = "accd4ea62f7bb7a82fe23066fb0957d48ef677f6eeb8215f372f52e48bb32426" [[package]] name = "vfio-bindings" -version = "0.5.0" +version = "0.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b565663f62e091ca47db9a674c8c95c9686a000e82970f391a3cacf6470ff060" +checksum = "698c66a4522a31ab407a410a59c9660da036178e4fe3f371825cd6aad7d46837" dependencies = [ "vmm-sys-util", ] [[package]] name = "vfio-ioctls" -version = "0.5.0" +version = "0.5.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "61316b5e308faa8ed4a87c4130256f765e46de3442eb2e2e619840ef73456738" +checksum = "7af7e8d49719333e5eb52209417f26695c9ab2b117a82596a63a44947f97c5d6" dependencies = [ "byteorder", "kvm-bindings", @@ -2256,9 +2256,9 @@ dependencies = [ [[package]] name = "vfio_user" -version = "0.1.0" +version = "0.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ed81c5ed8224d468a322e923777ed0615cad433fe61177126098af995f89cecf" +checksum = "b8db5bc783aad75202ad4cbcdc5e893cff1dd8fa24a1bcdb4de8998d3c4d169a" dependencies = [ "bitflags 2.9.3", "libc", diff --git a/Cargo.toml b/Cargo.toml index d216a795ba..5a0d56a439 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -110,12 +110,12 @@ kvm-bindings = "0.12.0" kvm-ioctls = "0.22.0" # TODO: update to 0.13.1+ linux-loader = { git = "https://github.com/rust-vmm/linux-loader", branch = "main" } -mshv-bindings = "0.5.2" -mshv-ioctls = "0.5.2" +mshv-bindings = "0.6.0" +mshv-ioctls = "0.6.0" seccompiler = "0.5.0" -vfio-bindings = { version = "0.5.0", default-features = false } -vfio-ioctls = { version = "0.5.0", default-features = false } -vfio_user = { version = "0.1.0", default-features = false } +vfio-bindings = { version = "0.6.0", default-features = false } +vfio-ioctls = { version = "0.5.1", default-features = false } +vfio_user = { version = "0.1.1", default-features = false } vhost = { version = "0.14.0", default-features = false } vhost-user-backend = { version = "0.20.0", default-features = false } virtio-bindings = "0.2.6" diff --git a/fuzz/Cargo.lock b/fuzz/Cargo.lock index f6692014dc..1750a54b58 100644 --- a/fuzz/Cargo.lock +++ b/fuzz/Cargo.lock @@ -121,9 +121,9 @@ checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a" [[package]] name = "bitflags" -version = "2.9.0" +version = "2.9.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5c8214115b7bf84099f1309324e63141d4c5d7cc26862f97a0a857dbefe165bd" +checksum = "2261d10cca569e4643e526d8dc2e62e433cc8aba21ab764233731f8d369bf394" [[package]] name = "block" @@ -176,18 +176,18 @@ checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" [[package]] name = "clap" -version = "4.5.13" +version = "4.5.47" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0fbb260a053428790f3de475e304ff84cdbc4face759ea7a3e64c1edd938a7fc" +checksum = "7eac00902d9d136acd712710d71823fb8ac8004ca445a89e73a41d45aa712931" dependencies = [ "clap_builder", ] [[package]] name = "clap_builder" -version = "4.5.13" +version = "4.5.47" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "64b17d7ea74e9f833c7dbf2cbe4fb12ff26783eda4782a8975b72f895c9b4d99" +checksum = "2ad9bbf750e73b5884fb8a211a9424a1906c1e156724260fdae972f31d70e1d6" dependencies = [ "anstream", "anstyle", @@ -197,9 +197,9 @@ dependencies = [ [[package]] name = "clap_lex" -version = "0.7.2" +version = "0.7.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1462739cb27611015575c0c11df5df7601141071f07518d56fcc1be504cbec97" +checksum = "b94f61472cee1439c0b966b47e3aca9ae07e45d070759512cd390ea2bebc6675" [[package]] name = "cloud-hypervisor-fuzz" @@ -300,7 +300,7 @@ dependencies = [ "acpi_tables", "anyhow", "arch", - "bitflags 2.9.0", + "bitflags 2.9.4", "byteorder", "event_monitor", "hypervisor", @@ -344,7 +344,7 @@ version = "4.3.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "74351c3392ea1ff6cd2628e0042d268ac2371cb613252ff383b6dfa50d22fa79" dependencies = [ - "bitflags 2.9.0", + "bitflags 2.9.4", "libc", ] @@ -406,7 +406,7 @@ version = "0.7.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "71d66e32caf5dd59f561be0143e413e01d651bd8498eb9aa0be8c482c81c8d31" dependencies = [ - "bitflags 2.9.0", + "bitflags 2.9.4", "cfg-if", "log", "managed", @@ -416,9 +416,9 @@ dependencies = [ [[package]] name = "gdbstub_arch" -version = "0.3.1" +version = "0.3.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "328a9e9425db13770d0d11de6332a608854266e44c53d12776be7b4aa427e3de" +checksum = "22dde0e1b68787036ccedd0b1ff6f953527a0e807e571fbe898975203027278f" dependencies = [ "gdbstub", "num-traits", @@ -555,7 +555,7 @@ version = "0.22.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b702df98508cb63ad89dd9beb9f6409761b30edca10d48e57941d3f11513a006" dependencies = [ - "bitflags 2.9.0", + "bitflags 2.9.4", "kvm-bindings", "libc", "vmm-sys-util", @@ -598,8 +598,7 @@ dependencies = [ [[package]] name = "linux-loader" version = "0.13.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "870c3814345f050991f99869417779f6062542bcf4ed81db7a1b926ad1306638" +source = "git+https://github.com/rust-vmm/linux-loader?branch=main#5fdaed87ddafc89d6abf0b50195a12d19133000d" dependencies = [ "vm-memory", ] @@ -643,9 +642,9 @@ dependencies = [ [[package]] name = "mshv-bindings" -version = "0.5.2" +version = "0.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "07f94f542c738f19317363222a7f415588c04cda964882479af41948ac3c3647" +checksum = "805cf329582f770f62cc612716a04c14815276ae266b6298375a672d3c5a5184" dependencies = [ "libc", "num_enum", @@ -923,9 +922,9 @@ dependencies = [ [[package]] name = "serde_json" -version = "1.0.128" +version = "1.0.143" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6ff5456707a1de34e7e37f2a6fd3d3f808c318259cbd01ab6377795054b483d8" +checksum = "d401abef1d108fbd9cbaebc3e46611f4b1021f714a0597a71f41ee463f5f4a5a" dependencies = [ "itoa", "memchr", @@ -987,9 +986,9 @@ dependencies = [ [[package]] name = "smallvec" -version = "1.13.2" +version = "1.15.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3c5e1a9a646d36c3599cd173a41282daf47c44583ad367b8e6837255952e5c67" +checksum = "67b1b7a3b5fe4f1376887184045fcf45c69e92af734b7aaddc05fb777b6fbd03" [[package]] name = "spin" @@ -1110,9 +1109,9 @@ checksum = "06abde3611657adf66d383f00b093d7faecc7fa57071cce2578660c9f1010821" [[package]] name = "uuid" -version = "1.17.0" +version = "1.18.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3cf4199d1e5d15ddd86a694e4d0dffa9c323ce759fea589f00fef9d81cc1931d" +checksum = "2f87b8aa10b915a06587d0dec516c282ff295b475d94abf425d62b57710070a2" dependencies = [ "getrandom 0.3.3", "js-sys", @@ -1122,18 +1121,18 @@ dependencies = [ [[package]] name = "vfio-bindings" -version = "0.5.0" +version = "0.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b565663f62e091ca47db9a674c8c95c9686a000e82970f391a3cacf6470ff060" +checksum = "698c66a4522a31ab407a410a59c9660da036178e4fe3f371825cd6aad7d46837" dependencies = [ "vmm-sys-util", ] [[package]] name = "vfio-ioctls" -version = "0.5.0" +version = "0.5.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "61316b5e308faa8ed4a87c4130256f765e46de3442eb2e2e619840ef73456738" +checksum = "7af7e8d49719333e5eb52209417f26695c9ab2b117a82596a63a44947f97c5d6" dependencies = [ "byteorder", "kvm-bindings", @@ -1148,11 +1147,11 @@ dependencies = [ [[package]] name = "vfio_user" -version = "0.1.0" +version = "0.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ed81c5ed8224d468a322e923777ed0615cad433fe61177126098af995f89cecf" +checksum = "b8db5bc783aad75202ad4cbcdc5e893cff1dd8fa24a1bcdb4de8998d3c4d169a" dependencies = [ - "bitflags 2.9.0", + "bitflags 2.9.4", "libc", "log", "serde", @@ -1170,7 +1169,7 @@ version = "0.14.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2a4dcad85a129d97d5d4b2f3c47a4affdeedd76bdcd02094bcb5d9b76cac2d05" dependencies = [ - "bitflags 2.9.0", + "bitflags 2.9.4", "libc", "uuid", "vm-memory", @@ -1242,6 +1241,7 @@ dependencies = [ "serde", "thiserror 2.0.12", "vfio-ioctls", + "vm-memory", "vmm-sys-util", ] @@ -1288,7 +1288,7 @@ dependencies = [ "acpi_tables", "anyhow", "arch", - "bitflags 2.9.0", + "bitflags 2.9.4", "block", "cfg-if", "clap", @@ -1524,7 +1524,7 @@ version = "0.39.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6f42320e61fe2cfd34354ecb597f86f413484a798ba44a8ca1165c58d42da6c1" dependencies = [ - "bitflags 2.9.0", + "bitflags 2.9.4", ] [[package]] diff --git a/fuzz/Cargo.toml b/fuzz/Cargo.toml index f8fa6fa4b7..ccdad23241 100644 --- a/fuzz/Cargo.toml +++ b/fuzz/Cargo.toml @@ -30,7 +30,7 @@ linux-loader = { git = "https://github.com/rust-vmm/linux-loader", branch = "mai "pe", ] } micro_http = { git = "https://github.com/firecracker-microvm/micro-http", branch = "main" } -mshv-bindings = "0.5.2" +mshv-bindings = "0.6.0" net_util = { path = "../net_util" } seccompiler = "0.5.0" virtio-devices = { path = "../virtio-devices" } From 5790bcefee4419e597a6cd8c544d80cb0554f6a9 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Tue, 9 Sep 2025 23:01:48 +0000 Subject: [PATCH 168/294] build: Bump miniz_oxide from 0.8.8 to 0.8.9 Bumps [miniz_oxide](https://github.com/Frommi/miniz_oxide) from 0.8.8 to 0.8.9. - [Changelog](https://github.com/Frommi/miniz_oxide/blob/master/CHANGELOG.md) - [Commits](https://github.com/Frommi/miniz_oxide/commits) --- updated-dependencies: - dependency-name: miniz_oxide dependency-version: 0.8.9 dependency-type: indirect update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] --- Cargo.lock | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 0111343fa8..0ee00fa852 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1210,9 +1210,9 @@ dependencies = [ [[package]] name = "miniz_oxide" -version = "0.8.8" +version = "0.8.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3be647b768db090acb35d5ec5db2b0e1f1de11133ca123b9eacf5137868f892a" +checksum = "1fa76a2c86f704bdb222d66965fb3d63269ce38518b83cb0575fca855ebb6316" dependencies = [ "adler2", ] From 061351d82d15fe92ce84790c4881f85fbef9d094 Mon Sep 17 00:00:00 2001 From: Philipp Schuster Date: Fri, 15 Aug 2025 13:54:48 +0200 Subject: [PATCH 169/294] build: upgrade whole* workspace to Rust edition 2024 This upgrades the Cargo workspace to Rust edition 2024 to keep the code base clean and up to date. The commit only contains the adjustments to the Cargo.toml files and basic compiler error fixes. Also, this commit includes new SAFETY comments as discussed in [1]. The changes were not automatically fixed by `cargo fix --edition` but needed manual adjustments. Apart from that, all formatting and clippy adjustments follow in subsequent commits. * As only exception, workspace member net_gen sticks to edition 2021 for now as discussed in [0]. [0] https://github.com/cloud-hypervisor/cloud-hypervisor/pull/7295#discussion_r2310851041 [1] https://github.com/cloud-hypervisor/cloud-hypervisor/pull/7256#issuecomment-3271888674 Signed-off-by: Philipp Schuster On-behalf-of: SAP philipp.schuster@sap.com --- Cargo.toml | 3 ++- api_client/Cargo.toml | 2 +- arch/Cargo.toml | 2 +- block/Cargo.toml | 2 +- devices/Cargo.toml | 2 +- devices/src/pvmemcontrol.rs | 3 ++- event_monitor/Cargo.toml | 2 +- hypervisor/Cargo.toml | 2 +- net_gen/Cargo.toml | 1 + net_util/Cargo.toml | 2 +- net_util/src/tap.rs | 9 ++++++--- option_parser/Cargo.toml | 2 +- pci/Cargo.toml | 2 +- pci/src/vfio.rs | 6 +++--- performance-metrics/Cargo.toml | 2 +- rate_limiter/Cargo.toml | 2 +- serial_buffer/Cargo.toml | 2 +- test_infra/Cargo.toml | 2 +- tracer/Cargo.toml | 2 +- vhost_user_block/Cargo.toml | 2 +- vhost_user_net/Cargo.toml | 2 +- virtio-devices/Cargo.toml | 2 +- virtio-devices/src/iommu.rs | 6 +++--- vm-allocator/Cargo.toml | 2 +- vm-device/Cargo.toml | 2 +- vm-migration/Cargo.toml | 2 +- vm-virtio/Cargo.toml | 2 +- vmm/Cargo.toml | 2 +- vmm/src/clone3.rs | 3 ++- vmm/src/device_manager.rs | 2 +- vmm/src/sigwinch_listener.rs | 18 ++++++++++++------ 31 files changed, 54 insertions(+), 41 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 5a0d56a439..9f78b3f8c6 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -3,7 +3,7 @@ authors = ["The Cloud Hypervisor Authors"] build = "build.rs" default-run = "cloud-hypervisor" description = "Open source Virtual Machine Monitor (VMM) that runs on top of KVM & MSHV" -edition = "2021" +edition = "2024" homepage = "https://github.com/cloud-hypervisor/cloud-hypervisor" license = "Apache-2.0 AND BSD-3-Clause" name = "cloud-hypervisor" @@ -102,6 +102,7 @@ members = [ "vm-virtio", "vmm", ] +package.edition = "2024" [workspace.dependencies] # rust-vmm crates diff --git a/api_client/Cargo.toml b/api_client/Cargo.toml index 630f1b4c44..429ecbf927 100644 --- a/api_client/Cargo.toml +++ b/api_client/Cargo.toml @@ -1,6 +1,6 @@ [package] authors = ["The Cloud Hypervisor Authors"] -edition = "2021" +edition.workspace = true name = "api_client" version = "0.1.0" diff --git a/arch/Cargo.toml b/arch/Cargo.toml index 4739c14f36..3bd32affb1 100644 --- a/arch/Cargo.toml +++ b/arch/Cargo.toml @@ -1,6 +1,6 @@ [package] authors = ["The Chromium OS Authors"] -edition = "2021" +edition.workspace = true name = "arch" version = "0.1.0" diff --git a/block/Cargo.toml b/block/Cargo.toml index 9e0505921e..db4ac9a6b3 100644 --- a/block/Cargo.toml +++ b/block/Cargo.toml @@ -1,6 +1,6 @@ [package] authors = ["The Chromium OS Authors", "The Cloud Hypervisor Authors"] -edition = "2021" +edition.workspace = true name = "block" version = "0.1.0" diff --git a/devices/Cargo.toml b/devices/Cargo.toml index 2b985085f4..bf620eca90 100644 --- a/devices/Cargo.toml +++ b/devices/Cargo.toml @@ -1,6 +1,6 @@ [package] authors = ["The Chromium OS Authors"] -edition = "2021" +edition.workspace = true name = "devices" version = "0.1.0" diff --git a/devices/src/pvmemcontrol.rs b/devices/src/pvmemcontrol.rs index d119a21a1a..ff2190933c 100644 --- a/devices/src/pvmemcontrol.rs +++ b/devices/src/pvmemcontrol.rs @@ -137,7 +137,8 @@ impl PvmemcontrolTransport { } unsafe fn as_register(self) -> PvmemcontrolTransportRegister { - self.payload.register + // SAFETY: We access initialized data. + unsafe { self.payload.register } } } diff --git a/event_monitor/Cargo.toml b/event_monitor/Cargo.toml index af63335046..b2b7a4e48d 100644 --- a/event_monitor/Cargo.toml +++ b/event_monitor/Cargo.toml @@ -1,6 +1,6 @@ [package] authors = ["The Cloud Hypervisor Authors"] -edition = "2021" +edition.workspace = true name = "event_monitor" version = "0.1.0" diff --git a/hypervisor/Cargo.toml b/hypervisor/Cargo.toml index bdfefdeefd..c756cf9fc1 100644 --- a/hypervisor/Cargo.toml +++ b/hypervisor/Cargo.toml @@ -1,6 +1,6 @@ [package] authors = ["Microsoft Authors"] -edition = "2021" +edition.workspace = true license = "Apache-2.0 OR BSD-3-Clause" name = "hypervisor" version = "0.1.0" diff --git a/net_gen/Cargo.toml b/net_gen/Cargo.toml index c0edc11559..b1443c1f29 100644 --- a/net_gen/Cargo.toml +++ b/net_gen/Cargo.toml @@ -1,6 +1,7 @@ [package] authors = ["The Chromium OS Authors"] edition = "2021" +#edition.workspace = true name = "net_gen" version = "0.1.0" diff --git a/net_util/Cargo.toml b/net_util/Cargo.toml index 74ad342085..8f5df72225 100644 --- a/net_util/Cargo.toml +++ b/net_util/Cargo.toml @@ -1,6 +1,6 @@ [package] authors = ["The Chromium OS Authors"] -edition = "2021" +edition.workspace = true name = "net_util" version = "0.1.0" diff --git a/net_util/src/tap.rs b/net_util/src/tap.rs index 533d6a45b2..591dbc4912 100644 --- a/net_util/src/tap.rs +++ b/net_util/src/tap.rs @@ -133,7 +133,8 @@ impl Tap { /// The caller should ensure to pass a valid file descriptor and valid /// arguments for the `ioctl()` syscall. unsafe fn ioctl_with_mut_ref(fd: &F, req: c_ulong, arg: &mut T) -> Result<()> { - let ret = ioctl_with_mut_ref(fd, req, arg); + // SAFETY: file descriptor is valid and return value is checked + let ret = unsafe { ioctl_with_mut_ref(fd, req, arg) }; if ret < 0 { return Err(Error::IoctlError(req, IoError::last_os_error())); } @@ -145,7 +146,8 @@ impl Tap { /// The caller should ensure to pass a valid file descriptor and valid /// arguments for the `ioctl()` syscall. unsafe fn ioctl_with_ref(fd: &F, req: c_ulong, arg: &T) -> Result<()> { - let ret = ioctl_with_ref(fd, req, arg); + // SAFETY: file descriptor is valid and return value is checked + let ret = unsafe { ioctl_with_ref(fd, req, arg) }; if ret < 0 { return Err(Error::IoctlError(req, IoError::last_os_error())); } @@ -157,7 +159,8 @@ impl Tap { /// The caller should ensure to pass a valid file descriptor and valid /// arguments for the `ioctl()` syscall. unsafe fn ioctl_with_val(fd: &F, req: c_ulong, arg: c_ulong) -> Result<()> { - let ret = ioctl_with_val(fd, req, arg); + // SAFETY: file descriptor is valid and return value is checked + let ret = unsafe { ioctl_with_val(fd, req, arg) }; if ret < 0 { return Err(Error::IoctlError(req, IoError::last_os_error())); } diff --git a/option_parser/Cargo.toml b/option_parser/Cargo.toml index 2b6d0fe110..abacf51ddd 100644 --- a/option_parser/Cargo.toml +++ b/option_parser/Cargo.toml @@ -1,6 +1,6 @@ [package] authors = ["The Cloud Hypervisor Authors"] -edition = "2021" +edition.workspace = true name = "option_parser" version = "0.1.0" diff --git a/pci/Cargo.toml b/pci/Cargo.toml index 49618b194b..e1d631c348 100644 --- a/pci/Cargo.toml +++ b/pci/Cargo.toml @@ -1,6 +1,6 @@ [package] authors = ["Samuel Ortiz "] -edition = "2021" +edition.workspace = true name = "pci" version = "0.1.0" diff --git a/pci/src/vfio.rs b/pci/src/vfio.rs index 660f27bd4e..42048b8f5f 100644 --- a/pci/src/vfio.rs +++ b/pci/src/vfio.rs @@ -190,7 +190,7 @@ pub(crate) struct Interrupt { impl Interrupt { fn update_msi(&mut self, offset: u64, data: &[u8]) -> Option { - if let Some(ref mut msi) = &mut self.msi { + if let Some(msi) = &mut self.msi { let action = msi.update(offset, data); return action; } @@ -199,7 +199,7 @@ impl Interrupt { } fn update_msix(&mut self, offset: u64, data: &[u8]) -> Option { - if let Some(ref mut msix) = &mut self.msix { + if let Some(msix) = &mut self.msix { let action = msix.update(offset, data); return action; } @@ -237,7 +237,7 @@ impl Interrupt { } fn msix_write_table(&mut self, offset: u64, data: &[u8]) { - if let Some(ref mut msix) = &mut self.msix { + if let Some(msix) = &mut self.msix { let offset = offset - u64::from(msix.cap.table_offset()); msix.bar.write_table(offset, data) } diff --git a/performance-metrics/Cargo.toml b/performance-metrics/Cargo.toml index 2403913767..00e6e74682 100644 --- a/performance-metrics/Cargo.toml +++ b/performance-metrics/Cargo.toml @@ -1,7 +1,7 @@ [package] authors = ["The Cloud Hypervisor Authors"] build = "../build.rs" -edition = "2021" +edition.workspace = true name = "performance-metrics" version = "0.1.0" diff --git a/rate_limiter/Cargo.toml b/rate_limiter/Cargo.toml index 6c6cdd650e..3067c695bb 100644 --- a/rate_limiter/Cargo.toml +++ b/rate_limiter/Cargo.toml @@ -1,5 +1,5 @@ [package] -edition = "2021" +edition.workspace = true name = "rate_limiter" version = "0.1.0" diff --git a/serial_buffer/Cargo.toml b/serial_buffer/Cargo.toml index 2c3993cff4..0691b8a3b7 100644 --- a/serial_buffer/Cargo.toml +++ b/serial_buffer/Cargo.toml @@ -1,5 +1,5 @@ [package] authors = ["The Cloud Hypervisor Authors"] -edition = "2021" +edition.workspace = true name = "serial_buffer" version = "0.1.0" diff --git a/test_infra/Cargo.toml b/test_infra/Cargo.toml index 37c6f38454..fe3cba8eea 100644 --- a/test_infra/Cargo.toml +++ b/test_infra/Cargo.toml @@ -1,6 +1,6 @@ [package] authors = ["The Cloud Hypervisor Authors"] -edition = "2021" +edition.workspace = true name = "test_infra" version = "0.1.0" diff --git a/tracer/Cargo.toml b/tracer/Cargo.toml index 368520870f..bdcf559695 100644 --- a/tracer/Cargo.toml +++ b/tracer/Cargo.toml @@ -1,6 +1,6 @@ [package] authors = ["The Cloud Hypervisor Authors"] -edition = "2021" +edition.workspace = true name = "tracer" version = "0.1.0" diff --git a/vhost_user_block/Cargo.toml b/vhost_user_block/Cargo.toml index ea1acf12a3..c2e7385668 100644 --- a/vhost_user_block/Cargo.toml +++ b/vhost_user_block/Cargo.toml @@ -1,7 +1,7 @@ [package] authors = ["The Cloud Hypervisor Authors"] build = "../build.rs" -edition = "2021" +edition.workspace = true name = "vhost_user_block" version = "0.1.0" diff --git a/vhost_user_net/Cargo.toml b/vhost_user_net/Cargo.toml index f84eae8d18..6cd316e9fe 100644 --- a/vhost_user_net/Cargo.toml +++ b/vhost_user_net/Cargo.toml @@ -1,7 +1,7 @@ [package] authors = ["The Cloud Hypervisor Authors"] build = "../build.rs" -edition = "2021" +edition.workspace = true name = "vhost_user_net" version = "0.1.0" diff --git a/virtio-devices/Cargo.toml b/virtio-devices/Cargo.toml index e13e98f759..64a60910ca 100644 --- a/virtio-devices/Cargo.toml +++ b/virtio-devices/Cargo.toml @@ -1,6 +1,6 @@ [package] authors = ["The Cloud Hypervisor Authors"] -edition = "2021" +edition.workspace = true name = "virtio-devices" version = "0.1.0" diff --git a/virtio-devices/src/iommu.rs b/virtio-devices/src/iommu.rs index cdf9e59087..df310823af 100644 --- a/virtio-devices/src/iommu.rs +++ b/virtio-devices/src/iommu.rs @@ -489,7 +489,7 @@ impl Request { .write() .unwrap() .iter() - .filter(|(_, &d)| d == domain_id) + .filter(|&(_, &d)| d == domain_id) .map(|(&e, _)| e) .collect(); @@ -553,7 +553,7 @@ impl Request { .write() .unwrap() .iter() - .filter(|(_, &d)| d == domain_id) + .filter(|&(_, &d)| d == domain_id) .map(|(&e, _)| e) .collect(); @@ -669,7 +669,7 @@ fn detach_endpoint_from_domain( .write() .unwrap() .iter() - .filter(|(_, &d)| d == domain_id) + .filter(|&(_, &d)| d == domain_id) .count() == 0 { diff --git a/vm-allocator/Cargo.toml b/vm-allocator/Cargo.toml index 4f546058e1..e77e877917 100644 --- a/vm-allocator/Cargo.toml +++ b/vm-allocator/Cargo.toml @@ -1,6 +1,6 @@ [package] authors = ["The Chromium OS Authors"] -edition = "2021" +edition.workspace = true name = "vm-allocator" version = "0.1.0" diff --git a/vm-device/Cargo.toml b/vm-device/Cargo.toml index ca1d38807d..80ed1489a3 100644 --- a/vm-device/Cargo.toml +++ b/vm-device/Cargo.toml @@ -1,6 +1,6 @@ [package] authors = ["The Cloud Hypervisor Authors"] -edition = "2021" +edition.workspace = true name = "vm-device" version = "0.1.0" diff --git a/vm-migration/Cargo.toml b/vm-migration/Cargo.toml index de10ebdb33..7a8c9337b3 100644 --- a/vm-migration/Cargo.toml +++ b/vm-migration/Cargo.toml @@ -1,6 +1,6 @@ [package] authors = ["The Cloud Hypervisor Authors"] -edition = "2021" +edition.workspace = true name = "vm-migration" version = "0.1.0" diff --git a/vm-virtio/Cargo.toml b/vm-virtio/Cargo.toml index b22a2f5551..5f195af492 100644 --- a/vm-virtio/Cargo.toml +++ b/vm-virtio/Cargo.toml @@ -1,6 +1,6 @@ [package] authors = ["The Cloud Hypervisor Authors"] -edition = "2021" +edition.workspace = true name = "vm-virtio" version = "0.1.0" diff --git a/vmm/Cargo.toml b/vmm/Cargo.toml index 7793357e2d..b931193a19 100644 --- a/vmm/Cargo.toml +++ b/vmm/Cargo.toml @@ -1,6 +1,6 @@ [package] authors = ["The Cloud Hypervisor Authors"] -edition = "2021" +edition.workspace = true name = "vmm" version = "0.1.0" diff --git a/vmm/src/clone3.rs b/vmm/src/clone3.rs index 0ab08126e1..fc273680f8 100644 --- a/vmm/src/clone3.rs +++ b/vmm/src/clone3.rs @@ -33,5 +33,6 @@ pub struct clone_args { /// - On error: `-1` and `errno` is set #[must_use] pub unsafe fn clone3(args: &mut clone_args, size: size_t) -> c_long { - syscall(SYS_clone3, args, size) + // SAFETY: parameters are assumed to be valid + unsafe { syscall(SYS_clone3, args, size) } } diff --git a/vmm/src/device_manager.rs b/vmm/src/device_manager.rs index 0ad2ab7ca4..256047e877 100644 --- a/vmm/src/device_manager.rs +++ b/vmm/src/device_manager.rs @@ -3373,7 +3373,7 @@ impl DeviceManager { let mut devices = Vec::new(); let mut vsock = self.config.lock().unwrap().vsock.clone(); - if let Some(ref mut vsock_cfg) = &mut vsock { + if let Some(vsock_cfg) = &mut vsock { devices.push(self.make_virtio_vsock_device(vsock_cfg)?); } self.config.lock().unwrap().vsock = vsock; diff --git a/vmm/src/sigwinch_listener.rs b/vmm/src/sigwinch_listener.rs index 4d7aebdca3..104a680a63 100644 --- a/vmm/src/sigwinch_listener.rs +++ b/vmm/src/sigwinch_listener.rs @@ -83,7 +83,8 @@ unsafe fn close_fds_fallback(keep_fds: &BTreeSet) { .collect(); for fd in open_fds.difference(keep_fds) { - close(*fd); + // SAFETY: The FD is valid + unsafe { close(*fd) }; } } @@ -108,12 +109,14 @@ unsafe fn close_unused_fds(keep_fds: &mut [RawFd]) { continue; } - if syscall(SYS_close_range, first, last, 0) == -1 { + // SAFETY: FDs are valid + if unsafe { syscall(SYS_close_range, first, last, 0) } == -1 { // The kernel might be too old to have close_range, in // which case we need to fall back to an uglier method. let e = io::Error::last_os_error(); if e.raw_os_error() == Some(ENOSYS) { - return close_fds_fallback(&keep_fds.iter().copied().collect()); + // SAFETY: FDs are valid + return unsafe { close_fds_fallback(&keep_fds.iter().copied().collect()) }; } panic!("close_range: {e}"); @@ -212,7 +215,8 @@ unsafe fn clone_clear_sighand() -> io::Result { ..Default::default() }; args.flags |= CLONE_CLEAR_SIGHAND; - let r = clone3(&mut args, size_of::()); + // SAFETY: parameters are assumed to be valid + let r = unsafe { clone3(&mut args, size_of::()) }; if r != -1 { return Ok(r.try_into().unwrap()); } @@ -223,13 +227,15 @@ unsafe fn clone_clear_sighand() -> io::Result { // If CLONE_CLEAR_SIGHAND isn't available, fall back to resetting // all the signal handlers one by one. - let r = fork(); + // SAFETY: trivially safe, and we check the return value. + let r = unsafe { fork() }; if r == -1 { return Err(io::Error::last_os_error()); } if r == 0 { for signum in 1.._NSIG { - let _ = signal(signum, SIG_DFL); + // SAFETY: trivially safe, we unset the user-space signal handler + let _ = unsafe { signal(signum, SIG_DFL) }; } } Ok(r.try_into().unwrap()) From 363273111a4428cc1e9532edad68e32951aee5cb Mon Sep 17 00:00:00 2001 From: Philipp Schuster Date: Tue, 12 Aug 2025 12:59:23 +0200 Subject: [PATCH 170/294] build: treewide: fmt for edition 2024 `cargo +nightly fmt` Signed-off-by: Philipp Schuster On-behalf-of: SAP philipp.schuster@sap.com --- .rustfmt.toml | 2 +- arch/src/aarch64/mod.rs | 2 +- arch/src/lib.rs | 19 +- arch/src/riscv64/mod.rs | 2 +- arch/src/x86_64/mod.rs | 12 +- arch/src/x86_64/mptable.rs | 2 +- arch/src/x86_64/smbios.rs | 2 +- block/src/fixed_vhd.rs | 2 +- block/src/fixed_vhd_sync.rs | 2 +- block/src/lib.rs | 6 +- block/src/qcow/mod.rs | 2 +- block/src/qcow/raw_file.rs | 2 +- block/src/qcow/vec_cache.rs | 2 +- block/src/qcow_sync.rs | 2 +- block/src/raw_async.rs | 2 +- block/src/raw_async_aio.rs | 2 +- block/src/raw_sync.rs | 2 +- block/src/vhd.rs | 4 +- block/src/vhdx/mod.rs | 2 +- block/src/vhdx/vhdx_io.rs | 4 +- block/src/vhdx_sync.rs | 2 +- devices/src/acpi.rs | 4 +- devices/src/gic.rs | 2 +- devices/src/ioapic.rs | 2 +- devices/src/ivshmem.rs | 6 +- devices/src/legacy/cmos.rs | 2 +- devices/src/legacy/fw_cfg.rs | 6 +- devices/src/legacy/gpio_pl061.rs | 10 +- devices/src/legacy/rtc_pl031.rs | 10 +- devices/src/legacy/serial.rs | 2 +- devices/src/legacy/uart_pl011.rs | 2 +- devices/src/lib.rs | 2 +- devices/src/pvmemcontrol.rs | 2 +- devices/src/pvpanic.rs | 6 +- devices/src/tpm.rs | 2 +- .../src/arch/x86/emulator/instructions/mod.rs | 2 +- hypervisor/src/arch/x86/emulator/mod.rs | 10 +- hypervisor/src/cpu.rs | 8 +- hypervisor/src/hypervisor.rs | 2 +- hypervisor/src/kvm/aarch64/gic/dist_regs.rs | 2 +- hypervisor/src/kvm/aarch64/gic/icc_regs.rs | 3 +- hypervisor/src/kvm/aarch64/gic/redist_regs.rs | 10 +- hypervisor/src/kvm/aarch64/mod.rs | 4 +- hypervisor/src/kvm/mod.rs | 47 +-- hypervisor/src/kvm/riscv64/aia.rs | 2 +- hypervisor/src/kvm/riscv64/mod.rs | 4 +- hypervisor/src/kvm/x86_64/mod.rs | 11 +- hypervisor/src/lib.rs | 2 +- hypervisor/src/mshv/mod.rs | 22 +- hypervisor/src/mshv/x86_64/mod.rs | 34 +- hypervisor/src/vm.rs | 4 +- net_util/src/lib.rs | 12 +- net_util/src/open_tap.rs | 2 +- net_util/src/queue_pair.rs | 4 +- net_util/src/tap.rs | 26 +- pci/src/bus.rs | 2 +- pci/src/device.rs | 2 +- pci/src/lib.rs | 12 +- pci/src/vfio.rs | 23 +- pci/src/vfio_user.rs | 2 +- performance-metrics/src/main.rs | 2 +- performance-metrics/src/performance_tests.rs | 2 +- rate_limiter/src/group.rs | 2 +- rate_limiter/src/lib.rs | 4 +- serial_buffer/src/lib.rs | 2 +- src/bin/ch-remote.rs | 8 +- src/main.rs | 8 +- test_infra/src/lib.rs | 27 +- tests/integration.rs | 312 +++++++++++------- vhost_user_block/src/lib.rs | 4 +- vhost_user_net/src/lib.rs | 4 +- virtio-devices/src/balloon.rs | 18 +- virtio-devices/src/block.rs | 12 +- virtio-devices/src/console.rs | 6 +- virtio-devices/src/iommu.rs | 4 +- virtio-devices/src/lib.rs | 4 +- virtio-devices/src/mem.rs | 8 +- virtio-devices/src/net.rs | 10 +- virtio-devices/src/pmem.rs | 6 +- virtio-devices/src/rng.rs | 6 +- virtio-devices/src/seccomp_filters.rs | 16 +- virtio-devices/src/thread_helper.rs | 6 +- virtio-devices/src/transport/mod.rs | 2 +- virtio-devices/src/transport/pci_device.rs | 10 +- virtio-devices/src/vdpa.rs | 8 +- virtio-devices/src/vhost_user/blk.rs | 14 +- virtio-devices/src/vhost_user/fs.rs | 14 +- virtio-devices/src/vhost_user/mod.rs | 10 +- virtio-devices/src/vhost_user/net.rs | 14 +- .../src/vhost_user/vu_common_ctrl.rs | 8 +- virtio-devices/src/vsock/csm/connection.rs | 20 +- virtio-devices/src/vsock/csm/txbuf.rs | 2 +- virtio-devices/src/vsock/device.rs | 8 +- virtio-devices/src/vsock/mod.rs | 2 +- virtio-devices/src/vsock/packet.rs | 4 +- virtio-devices/src/vsock/unix/mod.rs | 2 +- virtio-devices/src/vsock/unix/muxer.rs | 2 +- virtio-devices/src/vsock/unix/muxer_killq.rs | 2 +- virtio-devices/src/vsock/unix/muxer_rxq.rs | 2 +- virtio-devices/src/watchdog.rs | 6 +- vm-allocator/src/page_size.rs | 2 +- vmm/src/acpi.rs | 6 +- vmm/src/api/dbus/mod.rs | 8 +- vmm/src/api/http/http_endpoint.rs | 2 +- vmm/src/api/http/mod.rs | 6 +- vmm/src/api/mod.rs | 4 +- vmm/src/clone3.rs | 2 +- vmm/src/config.rs | 43 ++- vmm/src/console_devices.rs | 10 +- vmm/src/cpu.rs | 50 +-- vmm/src/device_manager.rs | 38 +-- vmm/src/gdb.rs | 12 +- vmm/src/igvm/igvm_loader.rs | 12 +- vmm/src/igvm/loader.rs | 2 +- vmm/src/landlock.rs | 6 +- vmm/src/lib.rs | 140 ++++---- vmm/src/memory_manager.rs | 8 +- vmm/src/pci_segment.rs | 11 +- vmm/src/sigwinch_listener.rs | 17 +- vmm/src/vm.rs | 39 +-- vmm/src/vm_config.rs | 18 +- 121 files changed, 771 insertions(+), 653 deletions(-) diff --git a/.rustfmt.toml b/.rustfmt.toml index 754d7badfd..394a1065be 100644 --- a/.rustfmt.toml +++ b/.rustfmt.toml @@ -1,4 +1,4 @@ -edition = "2021" +edition = "2024" group_imports="StdExternalCrate" imports_granularity="Module" diff --git a/arch/src/aarch64/mod.rs b/arch/src/aarch64/mod.rs index f7a6c36539..f98942b83a 100644 --- a/arch/src/aarch64/mod.rs +++ b/arch/src/aarch64/mod.rs @@ -15,7 +15,7 @@ use std::sync::{Arc, Mutex}; use hypervisor::arch::aarch64::gic::Vgic; use hypervisor::arch::aarch64::regs::MPIDR_EL1; -use log::{log_enabled, Level}; +use log::{Level, log_enabled}; use thiserror::Error; use vm_memory::{Address, GuestAddress, GuestMemory, GuestMemoryAtomic}; diff --git a/arch/src/lib.rs b/arch/src/lib.rs index 2413fe2235..36fa20f13c 100644 --- a/arch/src/lib.rs +++ b/arch/src/lib.rs @@ -81,9 +81,9 @@ pub mod aarch64; #[cfg(target_arch = "aarch64")] pub use aarch64::{ - arch_memory_regions, configure_system, configure_vcpu, fdt::DeviceInfoForFdt, - get_host_cpu_phys_bits, initramfs_load_addr, layout, layout::CMDLINE_MAX_SIZE, - layout::IRQ_BASE, uefi, EntryPoint, _NSIG, + _NSIG, EntryPoint, arch_memory_regions, configure_system, configure_vcpu, + fdt::DeviceInfoForFdt, get_host_cpu_phys_bits, initramfs_load_addr, layout, + layout::CMDLINE_MAX_SIZE, layout::IRQ_BASE, uefi, }; /// Module for riscv64 related functionality. @@ -92,9 +92,9 @@ pub mod riscv64; #[cfg(target_arch = "riscv64")] pub use riscv64::{ - arch_memory_regions, configure_system, configure_vcpu, fdt::DeviceInfoForFdt, - get_host_cpu_phys_bits, initramfs_load_addr, layout, layout::CMDLINE_MAX_SIZE, - layout::IRQ_BASE, uefi, EntryPoint, _NSIG, + _NSIG, EntryPoint, arch_memory_regions, configure_system, configure_vcpu, + fdt::DeviceInfoForFdt, get_host_cpu_phys_bits, initramfs_load_addr, layout, + layout::CMDLINE_MAX_SIZE, layout::IRQ_BASE, uefi, }; #[cfg(target_arch = "x86_64")] @@ -102,10 +102,9 @@ pub mod x86_64; #[cfg(target_arch = "x86_64")] pub use x86_64::{ - arch_memory_regions, configure_system, configure_vcpu, generate_common_cpuid, - generate_ram_ranges, get_host_cpu_phys_bits, initramfs_load_addr, layout, - layout::CMDLINE_MAX_SIZE, layout::CMDLINE_START, regs, CpuidConfig, CpuidFeatureEntry, - EntryPoint, _NSIG, + _NSIG, CpuidConfig, CpuidFeatureEntry, EntryPoint, arch_memory_regions, configure_system, + configure_vcpu, generate_common_cpuid, generate_ram_ranges, get_host_cpu_phys_bits, + initramfs_load_addr, layout, layout::CMDLINE_MAX_SIZE, layout::CMDLINE_START, regs, }; /// Safe wrapper for `sysconf(_SC_PAGESIZE)`. diff --git a/arch/src/riscv64/mod.rs b/arch/src/riscv64/mod.rs index 62554bd1a7..6a0342b3cd 100644 --- a/arch/src/riscv64/mod.rs +++ b/arch/src/riscv64/mod.rs @@ -15,7 +15,7 @@ use std::fmt::Debug; use std::sync::{Arc, Mutex}; use hypervisor::arch::riscv64::aia::Vaia; -use log::{log_enabled, Level}; +use log::{Level, log_enabled}; use thiserror::Error; use vm_memory::{Address, GuestAddress, GuestMemory, GuestMemoryAtomic}; diff --git a/arch/src/x86_64/mod.rs b/arch/src/x86_64/mod.rs index 83cb0876c0..71edd4508e 100644 --- a/arch/src/x86_64/mod.rs +++ b/arch/src/x86_64/mod.rs @@ -14,7 +14,7 @@ mod mptable; pub mod regs; use std::mem; -use hypervisor::arch::x86::{CpuIdEntry, CPUID_FLAG_VALID_INDEX}; +use hypervisor::arch::x86::{CPUID_FLAG_VALID_INDEX, CpuIdEntry}; use hypervisor::{CpuVendor, HypervisorCpuError, HypervisorError}; use linux_loader::loader::bootparam::{boot_params, setup_header}; use linux_loader::loader::elf::start_info::{ @@ -525,9 +525,13 @@ impl CpuidFeatureEntry { error!( "Detected incompatible CPUID entry: leaf={:#02x} (subleaf={:#02x}), register='{:?}', \ compatible_check='{:?}', source VM feature='{:#04x}', destination VM feature'{:#04x}'.", - entry.function, entry.index, entry.feature_reg, - entry.compatible_check, src_vm_feature, dest_vm_feature - ); + entry.function, + entry.index, + entry.feature_reg, + entry.compatible_check, + src_vm_feature, + dest_vm_feature + ); compatible = false; } diff --git a/arch/src/x86_64/mptable.rs b/arch/src/x86_64/mptable.rs index d688e41374..2e2669b38a 100644 --- a/arch/src/x86_64/mptable.rs +++ b/arch/src/x86_64/mptable.rs @@ -12,9 +12,9 @@ use thiserror::Error; use vm_memory::{Address, ByteValued, Bytes, GuestAddress, GuestMemory, GuestMemoryError}; use super::MAX_SUPPORTED_CPUS_LEGACY; +use crate::GuestMemoryMmap; use crate::layout::{APIC_START, HIGH_RAM_START, IOAPIC_START}; use crate::x86_64::{get_x2apic_id, mpspec}; -use crate::GuestMemoryMmap; // This is a workaround to the Rust enforcement specifying that any implementation of a foreign // trait (in this case `ByteValued`) where: diff --git a/arch/src/x86_64/smbios.rs b/arch/src/x86_64/smbios.rs index 55a7df1e72..7d867a43c6 100644 --- a/arch/src/x86_64/smbios.rs +++ b/arch/src/x86_64/smbios.rs @@ -12,8 +12,8 @@ use thiserror::Error; use uuid::Uuid; use vm_memory::{Address, ByteValued, Bytes, GuestAddress}; -use crate::layout::SMBIOS_START; use crate::GuestMemoryMmap; +use crate::layout::SMBIOS_START; #[derive(Debug, Error)] pub enum Error { diff --git a/block/src/fixed_vhd.rs b/block/src/fixed_vhd.rs index 22ef4dd80d..379005ae28 100644 --- a/block/src/fixed_vhd.rs +++ b/block/src/fixed_vhd.rs @@ -6,8 +6,8 @@ use std::fs::File; use std::io::{Read, Seek, SeekFrom, Write}; use std::os::unix::io::{AsRawFd, RawFd}; -use crate::vhd::VhdFooter; use crate::BlockBackend; +use crate::vhd::VhdFooter; #[derive(Debug)] pub struct FixedVhd { diff --git a/block/src/fixed_vhd_sync.rs b/block/src/fixed_vhd_sync.rs index b1f2118f19..c125710698 100644 --- a/block/src/fixed_vhd_sync.rs +++ b/block/src/fixed_vhd_sync.rs @@ -7,12 +7,12 @@ use std::os::unix::io::{AsRawFd, RawFd}; use vmm_sys_util::eventfd::EventFd; +use crate::BlockBackend; use crate::async_io::{ AsyncIo, AsyncIoError, AsyncIoResult, BorrowedDiskFd, DiskFile, DiskFileError, DiskFileResult, }; use crate::fixed_vhd::FixedVhd; use crate::raw_sync::RawFileSync; -use crate::BlockBackend; pub struct FixedVhdDiskSync(FixedVhd); diff --git a/block/src/lib.rs b/block/src/lib.rs index 3cca348b3b..5599258e3e 100644 --- a/block/src/lib.rs +++ b/block/src/lib.rs @@ -31,7 +31,7 @@ pub mod vhd; pub mod vhdx; pub mod vhdx_sync; -use std::alloc::{alloc_zeroed, dealloc, Layout}; +use std::alloc::{Layout, alloc_zeroed, dealloc}; use std::collections::VecDeque; use std::fmt::Debug; use std::fs::File; @@ -44,8 +44,8 @@ use std::time::Instant; use std::{cmp, result}; #[cfg(feature = "io_uring")] -use io_uring::{opcode, IoUring, Probe}; -use libc::{ioctl, S_IFBLK, S_IFMT}; +use io_uring::{IoUring, Probe, opcode}; +use libc::{S_IFBLK, S_IFMT, ioctl}; use serde::{Deserialize, Serialize}; use smallvec::SmallVec; use thiserror::Error; diff --git a/block/src/qcow/mod.rs b/block/src/qcow/mod.rs index 4d84918e97..da68146919 100644 --- a/block/src/qcow/mod.rs +++ b/block/src/qcow/mod.rs @@ -24,11 +24,11 @@ use vmm_sys_util::file_traits::{FileSetLen, FileSync}; use vmm_sys_util::seek_hole::SeekHole; use vmm_sys_util::write_zeroes::{PunchHole, WriteZeroesAt}; +use crate::BlockBackend; use crate::qcow::qcow_raw_file::QcowRawFile; pub use crate::qcow::raw_file::RawFile; use crate::qcow::refcount::RefCount; use crate::qcow::vec_cache::{CacheMap, Cacheable, VecCache}; -use crate::BlockBackend; /// Nesting depth limit for disk formats that can open other disk files. const MAX_NESTING_DEPTH: u32 = 10; diff --git a/block/src/qcow/raw_file.rs b/block/src/qcow/raw_file.rs index 67bc99fca6..f0eff54df3 100644 --- a/block/src/qcow/raw_file.rs +++ b/block/src/qcow/raw_file.rs @@ -8,7 +8,7 @@ // // SPDX-License-Identifier: Apache-2.0 AND BSD-3-Clause -use std::alloc::{alloc_zeroed, dealloc, Layout}; +use std::alloc::{Layout, alloc_zeroed, dealloc}; use std::fs::{File, Metadata}; use std::io::{self, Read, Seek, SeekFrom, Write}; use std::os::unix::io::{AsRawFd, RawFd}; diff --git a/block/src/qcow/vec_cache.rs b/block/src/qcow/vec_cache.rs index 67068fdded..76e5d44430 100644 --- a/block/src/qcow/vec_cache.rs +++ b/block/src/qcow/vec_cache.rs @@ -4,8 +4,8 @@ // // SPDX-License-Identifier: Apache-2.0 AND BSD-3-Clause -use std::collections::hash_map::IterMut; use std::collections::HashMap; +use std::collections::hash_map::IterMut; use std::io; use std::ops::{Index, IndexMut}; use std::slice::SliceIndex; diff --git a/block/src/qcow_sync.rs b/block/src/qcow_sync.rs index 332db24868..cd6a1fb774 100644 --- a/block/src/qcow_sync.rs +++ b/block/src/qcow_sync.rs @@ -9,11 +9,11 @@ use std::os::fd::AsRawFd; use vmm_sys_util::eventfd::EventFd; +use crate::AsyncAdaptor; use crate::async_io::{ AsyncIo, AsyncIoResult, BorrowedDiskFd, DiskFile, DiskFileError, DiskFileResult, }; use crate::qcow::{QcowFile, RawFile, Result as QcowResult}; -use crate::AsyncAdaptor; pub struct QcowDiskSync { qcow_file: QcowFile, diff --git a/block/src/raw_async.rs b/block/src/raw_async.rs index b3c9882fbb..1a582073b0 100644 --- a/block/src/raw_async.rs +++ b/block/src/raw_async.rs @@ -6,7 +6,7 @@ use std::fs::File; use std::io::{Error, Seek, SeekFrom}; use std::os::unix::io::{AsRawFd, RawFd}; -use io_uring::{opcode, types, IoUring}; +use io_uring::{IoUring, opcode, types}; use vmm_sys_util::eventfd::EventFd; use crate::async_io::{ diff --git a/block/src/raw_async_aio.rs b/block/src/raw_async_aio.rs index 9ef0c62619..9a74fa41d7 100644 --- a/block/src/raw_async_aio.rs +++ b/block/src/raw_async_aio.rs @@ -12,10 +12,10 @@ use std::os::unix::io::{AsRawFd, RawFd}; use vmm_sys_util::aio; use vmm_sys_util::eventfd::EventFd; +use crate::DiskTopology; use crate::async_io::{ AsyncIo, AsyncIoError, AsyncIoResult, BorrowedDiskFd, DiskFile, DiskFileError, DiskFileResult, }; -use crate::DiskTopology; pub struct RawFileDiskAio { file: File, diff --git a/block/src/raw_sync.rs b/block/src/raw_sync.rs index 54ba1acca6..6b98147e19 100644 --- a/block/src/raw_sync.rs +++ b/block/src/raw_sync.rs @@ -9,10 +9,10 @@ use std::os::unix::io::{AsRawFd, RawFd}; use vmm_sys_util::eventfd::EventFd; +use crate::DiskTopology; use crate::async_io::{ AsyncIo, AsyncIoError, AsyncIoResult, BorrowedDiskFd, DiskFile, DiskFileError, DiskFileResult, }; -use crate::DiskTopology; pub struct RawFileDiskSync { file: File, diff --git a/block/src/vhd.rs b/block/src/vhd.rs index 2cc65ca0b9..6659ed0385 100644 --- a/block/src/vhd.rs +++ b/block/src/vhd.rs @@ -5,7 +5,7 @@ use std::fs::File; use std::io::{Seek, SeekFrom}; -use crate::{read_aligned_block_size, DiskTopology}; +use crate::{DiskTopology, read_aligned_block_size}; #[derive(Clone, Copy)] pub struct VhdFooter { @@ -123,7 +123,7 @@ mod tests { use vmm_sys_util::tempfile::TempFile; - use super::{is_fixed_vhd, VhdFooter}; + use super::{VhdFooter, is_fixed_vhd}; fn valid_fixed_vhd_footer() -> Vec { vec![ diff --git a/block/src/vhdx/mod.rs b/block/src/vhdx/mod.rs index 45974c5a3f..d46905d934 100644 --- a/block/src/vhdx/mod.rs +++ b/block/src/vhdx/mod.rs @@ -12,11 +12,11 @@ use remain::sorted; use thiserror::Error; use uuid::Uuid; +use crate::BlockBackend; use crate::vhdx::vhdx_bat::{BatEntry, VhdxBatError}; use crate::vhdx::vhdx_header::{RegionInfo, RegionTableEntry, VhdxHeader, VhdxHeaderError}; use crate::vhdx::vhdx_io::VhdxIoError; use crate::vhdx::vhdx_metadata::{DiskSpec, VhdxMetadataError}; -use crate::BlockBackend; mod vhdx_bat; mod vhdx_header; diff --git a/block/src/vhdx/vhdx_io.rs b/block/src/vhdx/vhdx_io.rs index 30e3837876..14feac8d9d 100644 --- a/block/src/vhdx/vhdx_io.rs +++ b/block/src/vhdx/vhdx_io.rs @@ -35,9 +35,7 @@ pub enum VhdxIoError { pub type Result = std::result::Result; macro_rules! align { - ($n:expr, $align:expr) => {{ - $n.div_ceil($align) * $align - }}; + ($n:expr, $align:expr) => {{ $n.div_ceil($align) * $align }}; } #[derive(Default)] diff --git a/block/src/vhdx_sync.rs b/block/src/vhdx_sync.rs index fd5888e638..01bcbf5e7f 100644 --- a/block/src/vhdx_sync.rs +++ b/block/src/vhdx_sync.rs @@ -8,11 +8,11 @@ use std::os::fd::AsRawFd; use vmm_sys_util::eventfd::EventFd; +use crate::AsyncAdaptor; use crate::async_io::{ AsyncIo, AsyncIoResult, BorrowedDiskFd, DiskFile, DiskFileError, DiskFileResult, }; use crate::vhdx::{Result as VhdxResult, Vhdx}; -use crate::AsyncAdaptor; pub struct VhdxDiskSync { vhdx_file: Vhdx, diff --git a/devices/src/acpi.rs b/devices/src/acpi.rs index 2a38f5974c..229b67be54 100644 --- a/devices/src/acpi.rs +++ b/devices/src/acpi.rs @@ -8,9 +8,9 @@ use std::sync::{Arc, Barrier}; use std::thread; use std::time::Instant; -use acpi_tables::{aml, Aml, AmlSink}; -use vm_device::interrupt::InterruptSourceGroup; +use acpi_tables::{Aml, AmlSink, aml}; use vm_device::BusDevice; +use vm_device::interrupt::InterruptSourceGroup; use vm_memory::GuestAddress; use vmm_sys_util::eventfd::EventFd; diff --git a/devices/src/gic.rs b/devices/src/gic.rs index dcae0be375..a157c3f25d 100644 --- a/devices/src/gic.rs +++ b/devices/src/gic.rs @@ -9,8 +9,8 @@ use std::sync::{Arc, Mutex}; use anyhow::anyhow; use arch::layout; -use hypervisor::arch::aarch64::gic::{GicState, Vgic, VgicConfig}; use hypervisor::CpuState; +use hypervisor::arch::aarch64::gic::{GicState, Vgic, VgicConfig}; use vm_device::interrupt::{ InterruptIndex, InterruptManager, InterruptSourceConfig, InterruptSourceGroup, LegacyIrqSourceConfig, MsiIrqGroupConfig, diff --git a/devices/src/ioapic.rs b/devices/src/ioapic.rs index 7adbe4f66c..97932f016d 100644 --- a/devices/src/ioapic.rs +++ b/devices/src/ioapic.rs @@ -14,11 +14,11 @@ use std::sync::{Arc, Barrier}; use byteorder::{ByteOrder, LittleEndian}; use serde::{Deserialize, Serialize}; +use vm_device::BusDevice; use vm_device::interrupt::{ InterruptIndex, InterruptManager, InterruptSourceConfig, InterruptSourceGroup, MsiIrqGroupConfig, MsiIrqSourceConfig, }; -use vm_device::BusDevice; use vm_memory::GuestAddress; use vm_migration::{Migratable, MigratableError, Pausable, Snapshot, Snapshottable, Transportable}; use vmm_sys_util::eventfd::EventFd; diff --git a/devices/src/ivshmem.rs b/devices/src/ivshmem.rs index fc7c88de31..50c056edf3 100644 --- a/devices/src/ivshmem.rs +++ b/devices/src/ivshmem.rs @@ -12,9 +12,9 @@ use std::sync::{Arc, Barrier, Mutex}; use anyhow::anyhow; use byteorder::{ByteOrder, LittleEndian}; use pci::{ - BarReprogrammingParams, PciBarConfiguration, PciBarPrefetchable, PciBarRegionType, - PciClassCode, PciConfiguration, PciDevice, PciDeviceError, PciHeaderType, PciSubclass, - PCI_CONFIGURATION_ID, + BarReprogrammingParams, PCI_CONFIGURATION_ID, PciBarConfiguration, PciBarPrefetchable, + PciBarRegionType, PciClassCode, PciConfiguration, PciDevice, PciDeviceError, PciHeaderType, + PciSubclass, }; use serde::{Deserialize, Serialize}; use thiserror::Error; diff --git a/devices/src/legacy/cmos.rs b/devices/src/legacy/cmos.rs index 386281c67f..b0e140acc3 100644 --- a/devices/src/legacy/cmos.rs +++ b/devices/src/legacy/cmos.rs @@ -12,7 +12,7 @@ use std::{mem, thread}; // https://github.com/rust-lang/libc/issues/1848 #[cfg_attr(target_env = "musl", allow(deprecated))] use libc::time_t; -use libc::{clock_gettime, gmtime_r, timespec, tm, CLOCK_REALTIME}; +use libc::{CLOCK_REALTIME, clock_gettime, gmtime_r, timespec, tm}; use vm_device::BusDevice; use vmm_sys_util::eventfd::EventFd; diff --git a/devices/src/legacy/fw_cfg.rs b/devices/src/legacy/fw_cfg.rs index 02c52c707f..00b5bd7450 100644 --- a/devices/src/legacy/fw_cfg.rs +++ b/devices/src/legacy/fw_cfg.rs @@ -20,6 +20,7 @@ use std::{ }; use acpi_tables::rsdp::Rsdp; +use arch::RegionType; #[cfg(target_arch = "aarch64")] use arch::aarch64::layout::{ MEM_32BIT_DEVICES_START, MEM_32BIT_RESERVED_START, RAM_64BIT_START, RAM_START as HIGH_RAM_START, @@ -29,7 +30,6 @@ use arch::layout::{ EBDA_START, HIGH_RAM_START, MEM_32BIT_DEVICES_SIZE, MEM_32BIT_DEVICES_START, MEM_32BIT_RESERVED_START, PCI_MMCONFIG_SIZE, PCI_MMCONFIG_START, RAM_64BIT_START, }; -use arch::RegionType; use bitfield_struct::bitfield; #[cfg(target_arch = "x86_64")] use linux_loader::bootparam::boot_params; @@ -752,7 +752,9 @@ impl BusDevice for FwCfg { data.copy_from_slice(&addr_lo.to_be_bytes()); } _ => { - debug!("fw_cfg: read from unknown port {port:#x}: {size:#x} bytes and offset {offset:#x}."); + debug!( + "fw_cfg: read from unknown port {port:#x}: {size:#x} bytes and offset {offset:#x}." + ); } }; } diff --git a/devices/src/legacy/gpio_pl061.rs b/devices/src/legacy/gpio_pl061.rs index c7c66341a7..3a61238dd2 100644 --- a/devices/src/legacy/gpio_pl061.rs +++ b/devices/src/legacy/gpio_pl061.rs @@ -12,8 +12,8 @@ use std::{io, result}; use serde::{Deserialize, Serialize}; use thiserror::Error; -use vm_device::interrupt::InterruptSourceGroup; use vm_device::BusDevice; +use vm_device::interrupt::InterruptSourceGroup; use vm_migration::{Migratable, MigratableError, Pausable, Snapshot, Snapshottable, Transportable}; use crate::{read_le_u32, write_le_u32}; @@ -28,10 +28,10 @@ const GPIORIE: u64 = 0x414; // Raw Interrupt Status Register const GPIOMIS: u64 = 0x418; // Masked Interrupt Status Register const GPIOIC: u64 = 0x41c; // Interrupt Clear Register const GPIOAFSEL: u64 = 0x420; // Mode Control Select Register - // From 0x424 to 0xFDC => reserved space. - // From 0xFE0 to 0xFFC => Peripheral and PrimeCell Identification Registers which are Read Only registers. - // These registers can conceptually be treated as a 32-bit register, and PartNumber[11:0] is used to identify the peripheral. - // We are putting the expected values (look at 'Reset value' column from above mentioned document) in an array. +// From 0x424 to 0xFDC => reserved space. +// From 0xFE0 to 0xFFC => Peripheral and PrimeCell Identification Registers which are Read Only registers. +// These registers can conceptually be treated as a 32-bit register, and PartNumber[11:0] is used to identify the peripheral. +// We are putting the expected values (look at 'Reset value' column from above mentioned document) in an array. const GPIO_ID: [u8; 8] = [0x61, 0x10, 0x14, 0x00, 0x0d, 0xf0, 0x05, 0xb1]; // ID Margins const GPIO_ID_LOW: u64 = 0xfe0; diff --git a/devices/src/legacy/rtc_pl031.rs b/devices/src/legacy/rtc_pl031.rs index 98bca77af2..9ff0c8c1b1 100644 --- a/devices/src/legacy/rtc_pl031.rs +++ b/devices/src/legacy/rtc_pl031.rs @@ -31,11 +31,11 @@ const RTCIMSC: u64 = 0x10; // Interrupt Mask Set or Clear Register. const RTCRIS: u64 = 0x14; // Raw Interrupt Status. const RTCMIS: u64 = 0x18; // Masked Interrupt Status. const RTCICR: u64 = 0x1c; // Interrupt Clear Register. - // From 0x020 to 0xFDC => reserved space. - // From 0xFE0 to 0x1000 => Peripheral and PrimeCell Identification Registers which are Read Only registers. - // AMBA standard devices have CIDs (Cell IDs) and PIDs (Peripheral IDs). The linux kernel will look for these in order to assert the identity - // of these devices (i.e look at the `amba_device_try_add` function). - // We are putting the expected values (look at 'Reset value' column from above mentioned document) in an array. +// From 0x020 to 0xFDC => reserved space. +// From 0xFE0 to 0x1000 => Peripheral and PrimeCell Identification Registers which are Read Only registers. +// AMBA standard devices have CIDs (Cell IDs) and PIDs (Peripheral IDs). The linux kernel will look for these in order to assert the identity +// of these devices (i.e look at the `amba_device_try_add` function). +// We are putting the expected values (look at 'Reset value' column from above mentioned document) in an array. const PL031_ID: [u8; 8] = [0x31, 0x10, 0x14, 0x00, 0x0d, 0xf0, 0x05, 0xb1]; // We are only interested in the margins. const AMBA_ID_LOW: u64 = 0xFE0; diff --git a/devices/src/legacy/serial.rs b/devices/src/legacy/serial.rs index 973c96b0c5..cbfb2c10b9 100644 --- a/devices/src/legacy/serial.rs +++ b/devices/src/legacy/serial.rs @@ -10,8 +10,8 @@ use std::sync::{Arc, Barrier}; use std::{io, result}; use serde::{Deserialize, Serialize}; -use vm_device::interrupt::InterruptSourceGroup; use vm_device::BusDevice; +use vm_device::interrupt::InterruptSourceGroup; use vm_migration::{Migratable, MigratableError, Pausable, Snapshot, Snapshottable, Transportable}; use vmm_sys_util::errno::Result; diff --git a/devices/src/legacy/uart_pl011.rs b/devices/src/legacy/uart_pl011.rs index b5603808bf..364dd59278 100644 --- a/devices/src/legacy/uart_pl011.rs +++ b/devices/src/legacy/uart_pl011.rs @@ -13,8 +13,8 @@ use std::{io, result}; use serde::{Deserialize, Serialize}; use thiserror::Error; -use vm_device::interrupt::InterruptSourceGroup; use vm_device::BusDevice; +use vm_device::interrupt::InterruptSourceGroup; use vm_migration::{Migratable, MigratableError, Pausable, Snapshot, Snapshottable, Transportable}; use crate::{read_le_u32, write_le_u32}; diff --git a/devices/src/lib.rs b/devices/src/lib.rs index d7ac32d9d5..0c4bfb8ca4 100644 --- a/devices/src/lib.rs +++ b/devices/src/lib.rs @@ -37,7 +37,7 @@ pub mod tpm; pub use self::acpi::{AcpiGedDevice, AcpiPmTimerDevice, AcpiShutdownDevice}; #[cfg(feature = "ivshmem")] pub use self::ivshmem::IvshmemDevice; -pub use self::pvpanic::{PvPanicDevice, PVPANIC_DEVICE_MMIO_SIZE}; +pub use self::pvpanic::{PVPANIC_DEVICE_MMIO_SIZE, PvPanicDevice}; bitflags! { pub struct AcpiNotificationFlags: u8 { diff --git a/devices/src/pvmemcontrol.rs b/devices/src/pvmemcontrol.rs index ff2190933c..2977a9a527 100644 --- a/devices/src/pvmemcontrol.rs +++ b/devices/src/pvmemcontrol.rs @@ -520,7 +520,7 @@ impl PvmemcontrolBusDevice { ret_value: get_page_size().into(), arg0: MAJOR_VERSION.into(), arg1: MINOR_VERSION.into(), - }) + }); } FunctionCode::Dontneed => self.madvise(addr, length, libc::MADV_DONTNEED), FunctionCode::Remove => self.madvise(addr, length, libc::MADV_REMOVE), diff --git a/devices/src/pvpanic.rs b/devices/src/pvpanic.rs index 98e7bfa9cd..4fd61188b0 100644 --- a/devices/src/pvpanic.rs +++ b/devices/src/pvpanic.rs @@ -9,9 +9,9 @@ use std::sync::{Arc, Barrier, Mutex}; use anyhow::anyhow; use pci::{ - BarReprogrammingParams, PciBarConfiguration, PciBarPrefetchable, PciBarRegionType, - PciClassCode, PciConfiguration, PciDevice, PciDeviceError, PciHeaderType, PciSubclass, - PCI_CONFIGURATION_ID, + BarReprogrammingParams, PCI_CONFIGURATION_ID, PciBarConfiguration, PciBarPrefetchable, + PciBarRegionType, PciClassCode, PciConfiguration, PciDevice, PciDeviceError, PciHeaderType, + PciSubclass, }; use serde::{Deserialize, Serialize}; use thiserror::Error; diff --git a/devices/src/tpm.rs b/devices/src/tpm.rs index c6ed5ce0a9..4219f58308 100644 --- a/devices/src/tpm.rs +++ b/devices/src/tpm.rs @@ -12,8 +12,8 @@ use arch::aarch64::layout::{TPM_SIZE, TPM_START}; #[cfg(target_arch = "x86_64")] use arch::x86_64::layout::{TPM_SIZE, TPM_START}; use thiserror::Error; -use tpm::emulator::{BackendCmd, Emulator}; use tpm::TPM_CRB_BUFFER_MAX; +use tpm::emulator::{BackendCmd, Emulator}; use vm_device::BusDevice; #[derive(Error, Debug)] diff --git a/hypervisor/src/arch/x86/emulator/instructions/mod.rs b/hypervisor/src/arch/x86/emulator/instructions/mod.rs index c2d39aea09..945ce16bac 100644 --- a/hypervisor/src/arch/x86/emulator/instructions/mod.rs +++ b/hypervisor/src/arch/x86/emulator/instructions/mod.rs @@ -7,8 +7,8 @@ use iced_x86::*; use crate::arch::emulator::{EmulationError, PlatformEmulator, PlatformError}; -use crate::arch::x86::emulator::CpuStateManager; use crate::arch::x86::Exception; +use crate::arch::x86::emulator::CpuStateManager; pub mod cmp; pub mod mov; diff --git a/hypervisor/src/arch/x86/emulator/mod.rs b/hypervisor/src/arch/x86/emulator/mod.rs index 61bbd56fdf..9cc8ac4721 100644 --- a/hypervisor/src/arch/x86/emulator/mod.rs +++ b/hypervisor/src/arch/x86/emulator/mod.rs @@ -7,13 +7,13 @@ use anyhow::Context; use iced_x86::*; +use crate::StandardRegisters; use crate::arch::emulator::{EmulationError, EmulationResult, PlatformEmulator, PlatformError}; use crate::arch::x86::emulator::instructions::*; use crate::arch::x86::regs::{CR0_PE, EFER_LMA}; use crate::arch::x86::{ - segment_type_expand_down, segment_type_ro, Exception, SegmentRegister, SpecialRegisters, + Exception, SegmentRegister, SpecialRegisters, segment_type_expand_down, segment_type_ro, }; -use crate::StandardRegisters; #[macro_use] mod instructions; @@ -254,7 +254,7 @@ impl CpuStateManager for EmulatorCpuState { return Err(PlatformError::InvalidRegister(anyhow!( "read_reg invalid GPR {:?}", r - ))) + ))); } }; @@ -375,7 +375,7 @@ impl CpuStateManager for EmulatorCpuState { return Err(PlatformError::InvalidRegister(anyhow!( "write_reg invalid register {:?}", reg - ))) + ))); } } @@ -660,9 +660,9 @@ mod mock_vmm { use std::sync::{Arc, Mutex}; use super::*; + use crate::StandardRegisters; use crate::arch::x86::emulator::EmulatorCpuState as CpuState; use crate::arch::x86::gdt::{gdt_entry, segment_from_gdt}; - use crate::StandardRegisters; #[derive(Debug, Clone)] pub struct MockVmm { diff --git a/hypervisor/src/cpu.rs b/hypervisor/src/cpu.rs index 46401cc66c..5c377c5d1d 100644 --- a/hypervisor/src/cpu.rs +++ b/hypervisor/src/cpu.rs @@ -17,14 +17,14 @@ use thiserror::Error; #[cfg(not(target_arch = "riscv64"))] use vm_memory::GuestAddress; -#[cfg(target_arch = "x86_64")] -use crate::arch::x86::{CpuIdEntry, FpuState, LapicState, MsrEntry, SpecialRegisters}; -#[cfg(feature = "tdx")] -use crate::kvm::{TdxExitDetails, TdxExitStatus}; #[cfg(any(target_arch = "aarch64", target_arch = "riscv64"))] use crate::RegList; #[cfg(target_arch = "aarch64")] use crate::VcpuInit; +#[cfg(target_arch = "x86_64")] +use crate::arch::x86::{CpuIdEntry, FpuState, LapicState, MsrEntry, SpecialRegisters}; +#[cfg(feature = "tdx")] +use crate::kvm::{TdxExitDetails, TdxExitStatus}; use crate::{CpuState, MpState, StandardRegisters}; #[cfg(target_arch = "x86_64")] diff --git a/hypervisor/src/hypervisor.rs b/hypervisor/src/hypervisor.rs index 4fc98fb8bb..13d00fa009 100644 --- a/hypervisor/src/hypervisor.rs +++ b/hypervisor/src/hypervisor.rs @@ -13,6 +13,7 @@ use std::sync::Arc; use thiserror::Error; +use crate::HypervisorType; #[cfg(target_arch = "x86_64")] use crate::arch::x86::CpuIdEntry; #[cfg(target_arch = "x86_64")] @@ -20,7 +21,6 @@ use crate::cpu::CpuVendor; #[cfg(feature = "tdx")] use crate::kvm::TdxCapabilities; use crate::vm::Vm; -use crate::HypervisorType; #[derive(Error, Debug)] pub enum HypervisorError { diff --git a/hypervisor/src/kvm/aarch64/gic/dist_regs.rs b/hypervisor/src/kvm/aarch64/gic/dist_regs.rs index 0aa3da76de..9a3c719e7c 100644 --- a/hypervisor/src/kvm/aarch64/gic/dist_regs.rs +++ b/hypervisor/src/kvm/aarch64/gic/dist_regs.rs @@ -6,7 +6,7 @@ use kvm_ioctls::DeviceFd; use crate::arch::aarch64::gic::{Error, Result}; use crate::device::HypervisorDeviceError; use crate::kvm::kvm_bindings::{ - kvm_device_attr, KVM_DEV_ARM_VGIC_GRP_DIST_REGS, KVM_DEV_ARM_VGIC_GRP_NR_IRQS, + KVM_DEV_ARM_VGIC_GRP_DIST_REGS, KVM_DEV_ARM_VGIC_GRP_NR_IRQS, kvm_device_attr, }; /* diff --git a/hypervisor/src/kvm/aarch64/gic/icc_regs.rs b/hypervisor/src/kvm/aarch64/gic/icc_regs.rs index f993581840..b084c89899 100644 --- a/hypervisor/src/kvm/aarch64/gic/icc_regs.rs +++ b/hypervisor/src/kvm/aarch64/gic/icc_regs.rs @@ -7,10 +7,11 @@ use kvm_ioctls::DeviceFd; use crate::arch::aarch64::gic::{Error, Result}; use crate::device::HypervisorDeviceError; use crate::kvm::kvm_bindings::{ - kvm_device_attr, KVM_DEV_ARM_VGIC_GRP_CPU_SYSREGS, KVM_REG_ARM64_SYSREG_CRM_MASK, + KVM_DEV_ARM_VGIC_GRP_CPU_SYSREGS, KVM_REG_ARM64_SYSREG_CRM_MASK, KVM_REG_ARM64_SYSREG_CRM_SHIFT, KVM_REG_ARM64_SYSREG_CRN_MASK, KVM_REG_ARM64_SYSREG_CRN_SHIFT, KVM_REG_ARM64_SYSREG_OP0_MASK, KVM_REG_ARM64_SYSREG_OP0_SHIFT, KVM_REG_ARM64_SYSREG_OP1_MASK, KVM_REG_ARM64_SYSREG_OP1_SHIFT, KVM_REG_ARM64_SYSREG_OP2_MASK, KVM_REG_ARM64_SYSREG_OP2_SHIFT, + kvm_device_attr, }; const KVM_DEV_ARM_VGIC_V3_MPIDR_SHIFT: u32 = 32; diff --git a/hypervisor/src/kvm/aarch64/gic/redist_regs.rs b/hypervisor/src/kvm/aarch64/gic/redist_regs.rs index 7adc0efefc..c06818e046 100644 --- a/hypervisor/src/kvm/aarch64/gic/redist_regs.rs +++ b/hypervisor/src/kvm/aarch64/gic/redist_regs.rs @@ -4,15 +4,15 @@ use kvm_ioctls::DeviceFd; +use crate::CpuState; use crate::arch::aarch64::gic::{Error, Result}; use crate::device::HypervisorDeviceError; +use crate::kvm::VcpuKvmState; use crate::kvm::kvm_bindings::{ - kvm_device_attr, kvm_one_reg, KVM_DEV_ARM_VGIC_GRP_REDIST_REGS, KVM_REG_ARM64, - KVM_REG_ARM64_SYSREG, KVM_REG_ARM64_SYSREG_OP0_MASK, KVM_REG_ARM64_SYSREG_OP0_SHIFT, - KVM_REG_ARM64_SYSREG_OP2_MASK, KVM_REG_ARM64_SYSREG_OP2_SHIFT, KVM_REG_SIZE_U64, + KVM_DEV_ARM_VGIC_GRP_REDIST_REGS, KVM_REG_ARM64, KVM_REG_ARM64_SYSREG, + KVM_REG_ARM64_SYSREG_OP0_MASK, KVM_REG_ARM64_SYSREG_OP0_SHIFT, KVM_REG_ARM64_SYSREG_OP2_MASK, + KVM_REG_ARM64_SYSREG_OP2_SHIFT, KVM_REG_SIZE_U64, kvm_device_attr, kvm_one_reg, }; -use crate::kvm::VcpuKvmState; -use crate::CpuState; // Relevant redistributor registers that we want to save/restore. const GICR_CTLR: u32 = 0x0000; diff --git a/hypervisor/src/kvm/aarch64/mod.rs b/hypervisor/src/kvm/aarch64/mod.rs index 20fef72444..a94ed55f1c 100644 --- a/hypervisor/src/kvm/aarch64/mod.rs +++ b/hypervisor/src/kvm/aarch64/mod.rs @@ -11,8 +11,8 @@ pub mod gic; use kvm_bindings::{ - kvm_mp_state, kvm_one_reg, kvm_regs, KVM_REG_ARM_COPROC_MASK, KVM_REG_ARM_CORE, - KVM_REG_SIZE_MASK, KVM_REG_SIZE_U32, KVM_REG_SIZE_U64, + KVM_REG_ARM_COPROC_MASK, KVM_REG_ARM_CORE, KVM_REG_SIZE_MASK, KVM_REG_SIZE_U32, + KVM_REG_SIZE_U64, kvm_mp_state, kvm_one_reg, kvm_regs, }; pub use kvm_ioctls::{Cap, Kvm}; use serde::{Deserialize, Serialize}; diff --git a/hypervisor/src/kvm/mod.rs b/hypervisor/src/kvm/mod.rs index 9aaafd5b07..6becd0e411 100644 --- a/hypervisor/src/kvm/mod.rs +++ b/hypervisor/src/kvm/mod.rs @@ -29,7 +29,7 @@ use vmm_sys_util::eventfd::EventFd; #[cfg(target_arch = "aarch64")] use crate::aarch64::gic::KvmGicV3Its; #[cfg(target_arch = "aarch64")] -pub use crate::aarch64::{check_required_kvm_extensions, is_system_register, VcpuKvmState}; +pub use crate::aarch64::{VcpuKvmState, check_required_kvm_extensions, is_system_register}; #[cfg(target_arch = "aarch64")] use crate::arch::aarch64::gic::{Vgic, VgicConfig}; #[cfg(target_arch = "riscv64")] @@ -40,36 +40,37 @@ use crate::arm64_core_reg_id; use crate::riscv64::aia::KvmAiaImsics; #[cfg(target_arch = "riscv64")] pub use crate::riscv64::{ - aia::AiaImsicsState as AiaState, check_required_kvm_extensions, is_non_core_register, - VcpuKvmState, + VcpuKvmState, aia::AiaImsicsState as AiaState, check_required_kvm_extensions, + is_non_core_register, }; #[cfg(target_arch = "riscv64")] use crate::riscv64_reg_id; use crate::vm::{self, InterruptSourceConfig, VmOps}; -use crate::{cpu, hypervisor, HypervisorType}; +use crate::{HypervisorType, cpu, hypervisor}; // x86_64 dependencies #[cfg(target_arch = "x86_64")] pub mod x86_64; #[cfg(target_arch = "x86_64")] use kvm_bindings::{ - kvm_enable_cap, kvm_msr_entry, MsrList, KVM_CAP_HYPERV_SYNIC, KVM_CAP_SPLIT_IRQCHIP, - KVM_CAP_X2APIC_API, KVM_GUESTDBG_USE_HW_BP, KVM_X2APIC_API_DISABLE_BROADCAST_QUIRK, - KVM_X2APIC_API_USE_32BIT_IDS, + KVM_CAP_HYPERV_SYNIC, KVM_CAP_SPLIT_IRQCHIP, KVM_CAP_X2APIC_API, KVM_GUESTDBG_USE_HW_BP, + KVM_X2APIC_API_DISABLE_BROADCAST_QUIRK, KVM_X2APIC_API_USE_32BIT_IDS, MsrList, kvm_enable_cap, + kvm_msr_entry, }; #[cfg(target_arch = "x86_64")] use x86_64::check_required_kvm_extensions; #[cfg(target_arch = "x86_64")] pub use x86_64::{CpuId, ExtendedControlRegisters, MsrEntries, VcpuKvmState}; +#[cfg(target_arch = "x86_64")] +use crate::ClockData; #[cfg(target_arch = "x86_64")] use crate::arch::x86::{ - CpuIdEntry, FpuState, LapicState, MsrEntry, SpecialRegisters, XsaveState, NUM_IOAPIC_PINS, + CpuIdEntry, FpuState, LapicState, MsrEntry, NUM_IOAPIC_PINS, SpecialRegisters, XsaveState, }; -#[cfg(target_arch = "x86_64")] -use crate::ClockData; use crate::{ - CpuState, IoEventAddress, IrqRoutingEntry, MpState, StandardRegisters, UserMemoryRegion, + CpuState, IoEventAddress, IrqRoutingEntry, MpState, StandardRegisters, USER_MEMORY_REGION_LOG_DIRTY, USER_MEMORY_REGION_READ, USER_MEMORY_REGION_WRITE, + UserMemoryRegion, }; // aarch64 dependencies #[cfg(target_arch = "aarch64")] @@ -86,23 +87,23 @@ use std::mem; #[cfg(any(target_arch = "x86_64", target_arch = "aarch64"))] pub use kvm_bindings::kvm_vcpu_events as VcpuEvents; pub use kvm_bindings::{ - kvm_clock_data, kvm_create_device, kvm_create_device as CreateDevice, - kvm_device_attr as DeviceAttr, kvm_device_type_KVM_DEV_TYPE_VFIO, kvm_guest_debug, - kvm_irq_routing, kvm_irq_routing_entry, kvm_mp_state, kvm_run, kvm_userspace_memory_region, KVM_GUESTDBG_ENABLE, KVM_GUESTDBG_SINGLESTEP, KVM_IRQ_ROUTING_IRQCHIP, KVM_IRQ_ROUTING_MSI, - KVM_MEM_LOG_DIRTY_PAGES, KVM_MEM_READONLY, KVM_MSI_VALID_DEVID, + KVM_MEM_LOG_DIRTY_PAGES, KVM_MEM_READONLY, KVM_MSI_VALID_DEVID, kvm_clock_data, + kvm_create_device, kvm_create_device as CreateDevice, kvm_device_attr as DeviceAttr, + kvm_device_type_KVM_DEV_TYPE_VFIO, kvm_guest_debug, kvm_irq_routing, kvm_irq_routing_entry, + kvm_mp_state, kvm_run, kvm_userspace_memory_region, }; #[cfg(target_arch = "aarch64")] use kvm_bindings::{ - kvm_regs, user_pt_regs, KVM_GUESTDBG_USE_HW, KVM_NR_SPSR, KVM_REG_ARM64, KVM_REG_ARM64_SYSREG, + KVM_GUESTDBG_USE_HW, KVM_NR_SPSR, KVM_REG_ARM_CORE, KVM_REG_ARM64, KVM_REG_ARM64_SYSREG, KVM_REG_ARM64_SYSREG_CRM_MASK, KVM_REG_ARM64_SYSREG_CRN_MASK, KVM_REG_ARM64_SYSREG_OP0_MASK, - KVM_REG_ARM64_SYSREG_OP1_MASK, KVM_REG_ARM64_SYSREG_OP2_MASK, KVM_REG_ARM_CORE, - KVM_REG_SIZE_U128, KVM_REG_SIZE_U32, KVM_REG_SIZE_U64, + KVM_REG_ARM64_SYSREG_OP1_MASK, KVM_REG_ARM64_SYSREG_OP2_MASK, KVM_REG_SIZE_U32, + KVM_REG_SIZE_U64, KVM_REG_SIZE_U128, kvm_regs, user_pt_regs, }; #[cfg(target_arch = "riscv64")] -use kvm_bindings::{kvm_riscv_core, KVM_REG_RISCV_CORE}; +use kvm_bindings::{KVM_REG_RISCV_CORE, kvm_riscv_core}; #[cfg(feature = "tdx")] -use kvm_bindings::{kvm_run__bindgen_ty_1, KVMIO}; +use kvm_bindings::{KVMIO, kvm_run__bindgen_ty_1}; pub use kvm_ioctls::{Cap, Kvm, VcpuExit}; use thiserror::Error; use vfio_ioctls::VfioDeviceFd; @@ -112,10 +113,10 @@ use vmm_sys_util::ioctl_io_nr; use vmm_sys_util::{ioctl::ioctl_with_val, ioctl_iowr_nr}; pub use {kvm_bindings, kvm_ioctls}; -#[cfg(target_arch = "aarch64")] -use crate::arch::aarch64::regs; #[cfg(any(target_arch = "aarch64", target_arch = "riscv64"))] use crate::RegList; +#[cfg(target_arch = "aarch64")] +use crate::arch::aarch64::regs; #[cfg(target_arch = "x86_64")] ioctl_io_nr!(KVM_NMI, kvm_bindings::KVMIO, 0x9a); @@ -2866,7 +2867,7 @@ impl cpu::Vcpu for KvmVcpu { /// Return the list of initial MSR entries for a VCPU /// fn boot_msr_entries(&self) -> Vec { - use crate::arch::x86::{msr_index, MTRR_ENABLE, MTRR_MEM_TYPE_WB}; + use crate::arch::x86::{MTRR_ENABLE, MTRR_MEM_TYPE_WB, msr_index}; [ msr!(msr_index::MSR_IA32_SYSENTER_CS), diff --git a/hypervisor/src/kvm/riscv64/aia.rs b/hypervisor/src/kvm/riscv64/aia.rs index 607c9034c5..1aebbafbe4 100644 --- a/hypervisor/src/kvm/riscv64/aia.rs +++ b/hypervisor/src/kvm/riscv64/aia.rs @@ -7,10 +7,10 @@ use std::any::Any; use kvm_ioctls::DeviceFd; use serde::{Deserialize, Serialize}; +use crate::Vm; use crate::arch::riscv64::aia::{Error, Result, Vaia, VaiaConfig}; use crate::device::HypervisorDeviceError; use crate::kvm::KvmVm; -use crate::Vm; pub struct KvmAiaImsics { /// The KVM device for the Aia diff --git a/hypervisor/src/kvm/riscv64/mod.rs b/hypervisor/src/kvm/riscv64/mod.rs index c589b488a3..07f54efb3d 100644 --- a/hypervisor/src/kvm/riscv64/mod.rs +++ b/hypervisor/src/kvm/riscv64/mod.rs @@ -5,8 +5,8 @@ pub mod aia; use kvm_bindings::{ - kvm_mp_state, kvm_one_reg, kvm_riscv_core, KVM_REG_RISCV_CORE, KVM_REG_RISCV_TYPE_MASK, - KVM_REG_SIZE_MASK, KVM_REG_SIZE_U64, + KVM_REG_RISCV_CORE, KVM_REG_RISCV_TYPE_MASK, KVM_REG_SIZE_MASK, KVM_REG_SIZE_U64, kvm_mp_state, + kvm_one_reg, kvm_riscv_core, }; pub use kvm_ioctls::{Cap, Kvm}; use serde::{Deserialize, Serialize}; diff --git a/hypervisor/src/kvm/x86_64/mod.rs b/hypervisor/src/kvm/x86_64/mod.rs index 4cf05a1ac3..a01fb9d727 100644 --- a/hypervisor/src/kvm/x86_64/mod.rs +++ b/hypervisor/src/kvm/x86_64/mod.rs @@ -13,18 +13,17 @@ use serde::{Deserialize, Serialize}; /// Export generically-named wrappers of kvm-bindings for Unix-based platforms /// pub use { - kvm_bindings::kvm_cpuid_entry2, kvm_bindings::kvm_dtable, kvm_bindings::kvm_fpu, - kvm_bindings::kvm_lapic_state, kvm_bindings::kvm_mp_state as MpState, + kvm_bindings::CpuId, kvm_bindings::KVM_CPUID_FLAG_SIGNIFCANT_INDEX, kvm_bindings::MsrList, + kvm_bindings::Msrs as MsrEntries, kvm_bindings::kvm_cpuid_entry2, kvm_bindings::kvm_dtable, + kvm_bindings::kvm_fpu, kvm_bindings::kvm_lapic_state, kvm_bindings::kvm_mp_state as MpState, kvm_bindings::kvm_msr_entry, kvm_bindings::kvm_regs, kvm_bindings::kvm_segment, kvm_bindings::kvm_sregs, kvm_bindings::kvm_vcpu_events as VcpuEvents, kvm_bindings::kvm_xcrs as ExtendedControlRegisters, kvm_bindings::kvm_xsave, - kvm_bindings::CpuId, kvm_bindings::MsrList, kvm_bindings::Msrs as MsrEntries, - kvm_bindings::KVM_CPUID_FLAG_SIGNIFCANT_INDEX, }; use crate::arch::x86::{ - CpuIdEntry, DescriptorTable, FpuState, LapicState, MsrEntry, SegmentRegister, SpecialRegisters, - XsaveState, CPUID_FLAG_VALID_INDEX, + CPUID_FLAG_VALID_INDEX, CpuIdEntry, DescriptorTable, FpuState, LapicState, MsrEntry, + SegmentRegister, SpecialRegisters, XsaveState, }; use crate::kvm::{Cap, Kvm, KvmError, KvmResult}; diff --git a/hypervisor/src/lib.rs b/hypervisor/src/lib.rs index af383e3f3c..205691a421 100644 --- a/hypervisor/src/lib.rs +++ b/hypervisor/src/lib.rs @@ -61,7 +61,7 @@ pub use device::HypervisorDeviceError; #[cfg(all(feature = "kvm", target_arch = "aarch64"))] pub use kvm::aarch64; #[cfg(all(feature = "kvm", target_arch = "riscv64"))] -pub use kvm::{riscv64, AiaState}; +pub use kvm::{AiaState, riscv64}; pub use vm::{ DataMatch, HypervisorVmError, InterruptSourceConfig, LegacyIrqSourceConfig, MsiIrqSourceConfig, Vm, VmOps, diff --git a/hypervisor/src/mshv/mod.rs b/hypervisor/src/mshv/mod.rs index 93634ada0f..bc03c44dab 100644 --- a/hypervisor/src/mshv/mod.rs +++ b/hypervisor/src/mshv/mod.rs @@ -14,7 +14,7 @@ use arc_swap::ArcSwap; use mshv_bindings::*; #[cfg(target_arch = "x86_64")] use mshv_ioctls::InterruptRequest; -use mshv_ioctls::{set_registers_64, Mshv, NoDatamatch, VcpuFd, VmFd, VmType}; +use mshv_ioctls::{Mshv, NoDatamatch, VcpuFd, VmFd, VmType, set_registers_64}; use vfio_ioctls::VfioDeviceFd; use vm::DataMatch; #[cfg(feature = "sev_snp")] @@ -32,7 +32,7 @@ use crate::arch::x86::emulator::Emulator; use crate::mshv::aarch64::emulator; use crate::mshv::emulator::MshvEmulatorContext; use crate::vm::{self, InterruptSourceConfig, VmOps}; -use crate::{cpu, hypervisor, vec_with_array_field, HypervisorType}; +use crate::{HypervisorType, cpu, hypervisor, vec_with_array_field}; #[cfg(feature = "sev_snp")] mod snp_constants; // x86_64 dependencies @@ -45,10 +45,10 @@ use std::os::unix::io::AsRawFd; #[cfg(target_arch = "aarch64")] use std::sync::Mutex; -#[cfg(target_arch = "aarch64")] -use aarch64::gic::{MshvGicV2M, BASE_SPI_IRQ}; #[cfg(target_arch = "aarch64")] pub use aarch64::VcpuMshvState; +#[cfg(target_arch = "aarch64")] +use aarch64::gic::{BASE_SPI_IRQ, MshvGicV2M}; #[cfg(feature = "sev_snp")] use igvm_defs::IGVM_VHS_SNP_ID_BLOCK; #[cfg(feature = "sev_snp")] @@ -57,7 +57,7 @@ use vmm_sys_util::eventfd::EventFd; #[cfg(target_arch = "x86_64")] pub use x86_64::*; #[cfg(target_arch = "x86_64")] -pub use x86_64::{emulator, VcpuMshvState}; +pub use x86_64::{VcpuMshvState, emulator}; /// /// Export generically-named wrappers of mshv-bindings for Unix-based platforms /// @@ -66,18 +66,18 @@ pub use { mshv_bindings::mshv_device_attr as DeviceAttr, mshv_ioctls, mshv_ioctls::DeviceFd, }; +#[cfg(target_arch = "x86_64")] +use crate::ClockData; #[cfg(target_arch = "aarch64")] use crate::arch::aarch64::gic::{Vgic, VgicConfig}; #[cfg(target_arch = "aarch64")] use crate::arch::aarch64::regs; #[cfg(target_arch = "x86_64")] use crate::arch::x86::{CpuIdEntry, FpuState, MsrEntry}; -#[cfg(target_arch = "x86_64")] -use crate::ClockData; use crate::{ - CpuState, IoEventAddress, IrqRoutingEntry, MpState, UserMemoryRegion, - USER_MEMORY_REGION_ADJUSTABLE, USER_MEMORY_REGION_EXECUTE, USER_MEMORY_REGION_READ, - USER_MEMORY_REGION_WRITE, + CpuState, IoEventAddress, IrqRoutingEntry, MpState, USER_MEMORY_REGION_ADJUSTABLE, + USER_MEMORY_REGION_EXECUTE, USER_MEMORY_REGION_READ, USER_MEMORY_REGION_WRITE, + UserMemoryRegion, }; pub const PAGE_SHIFT: usize = 12; @@ -1512,7 +1512,7 @@ impl cpu::Vcpu for MshvVcpu { /// Return the list of initial MSR entries for a VCPU /// fn boot_msr_entries(&self) -> Vec { - use crate::arch::x86::{msr_index, MTRR_ENABLE, MTRR_MEM_TYPE_WB}; + use crate::arch::x86::{MTRR_ENABLE, MTRR_MEM_TYPE_WB, msr_index}; [ msr!(msr_index::MSR_IA32_SYSENTER_CS), diff --git a/hypervisor/src/mshv/x86_64/mod.rs b/hypervisor/src/mshv/x86_64/mod.rs index 1853d234d8..a25dcc3ca7 100644 --- a/hypervisor/src/mshv/x86_64/mod.rs +++ b/hypervisor/src/mshv/x86_64/mod.rs @@ -21,16 +21,16 @@ pub mod emulator; /// Export generically-named wrappers of mshv_bindings for Unix-based platforms /// pub use { - mshv_bindings::hv_cpuid_entry, mshv_bindings::mshv_user_mem_region as MemoryRegion, - mshv_bindings::msr_entry, mshv_bindings::AllVpStateComponents, mshv_bindings::CpuId, - mshv_bindings::DebugRegisters, mshv_bindings::FloatingPointUnit, - mshv_bindings::LapicState as MshvLapicState, mshv_bindings::MiscRegs as MiscRegisters, - mshv_bindings::MsrList, mshv_bindings::Msrs as MsrEntries, mshv_bindings::Msrs, + mshv_bindings::AllVpStateComponents, mshv_bindings::CpuId, mshv_bindings::DebugRegisters, + mshv_bindings::FloatingPointUnit, mshv_bindings::LapicState as MshvLapicState, + mshv_bindings::MiscRegs as MiscRegisters, mshv_bindings::MsrList, + mshv_bindings::Msrs as MsrEntries, mshv_bindings::Msrs, mshv_bindings::SegmentRegister as MshvSegmentRegister, mshv_bindings::SpecialRegisters as MshvSpecialRegisters, mshv_bindings::StandardRegisters as MshvStandardRegisters, mshv_bindings::SuspendRegisters, mshv_bindings::TableRegister, mshv_bindings::VcpuEvents, mshv_bindings::XSave as Xsave, - mshv_bindings::Xcrs as ExtendedControlRegisters, + mshv_bindings::Xcrs as ExtendedControlRegisters, mshv_bindings::hv_cpuid_entry, + mshv_bindings::mshv_user_mem_region as MemoryRegion, mshv_bindings::msr_entry, }; #[derive(Clone, Serialize, Deserialize)] @@ -60,16 +60,18 @@ impl fmt::Display for VcpuMshvState { msr_entries[i][1] = entry.data; msr_entries[i][0] = entry.index as u64; } - write!(f, "Number of MSRs: {}: MSRs: {:#010X?}, -- VCPU Events: {:?} -- Standard registers: {:?} Special Registers: {:?} ---- Floating Point Unit: {:?} --- Extended Control Register: {:?} --- DBG: {:?} --- VP States: {:?}", - msr_entries.len(), - msr_entries, - self.vcpu_events, - self.regs, - self.sregs, - self.fpu, - self.xcrs, - self.dbg, - self.vp_states, + write!( + f, + "Number of MSRs: {}: MSRs: {:#010X?}, -- VCPU Events: {:?} -- Standard registers: {:?} Special Registers: {:?} ---- Floating Point Unit: {:?} --- Extended Control Register: {:?} --- DBG: {:?} --- VP States: {:?}", + msr_entries.len(), + msr_entries, + self.vcpu_events, + self.regs, + self.sregs, + self.fpu, + self.xcrs, + self.dbg, + self.vp_states, ) } } diff --git a/hypervisor/src/vm.rs b/hypervisor/src/vm.rs index bd9c0e6746..a2f7921314 100644 --- a/hypervisor/src/vm.rs +++ b/hypervisor/src/vm.rs @@ -20,6 +20,8 @@ use igvm_defs::IGVM_VHS_SNP_ID_BLOCK; use thiserror::Error; use vmm_sys_util::eventfd::EventFd; +#[cfg(target_arch = "x86_64")] +use crate::ClockData; #[cfg(target_arch = "aarch64")] use crate::arch::aarch64::gic::{Vgic, VgicConfig}; #[cfg(target_arch = "riscv64")] @@ -27,8 +29,6 @@ use crate::arch::riscv64::aia::{Vaia, VaiaConfig}; #[cfg(feature = "tdx")] use crate::arch::x86::CpuIdEntry; use crate::cpu::Vcpu; -#[cfg(target_arch = "x86_64")] -use crate::ClockData; use crate::{IoEventAddress, IrqRoutingEntry, UserMemoryRegion}; /// diff --git a/net_util/src/lib.rs b/net_util/src/lib.rs index a28bcc433a..4ad7a1c77e 100644 --- a/net_util/src/lib.rs +++ b/net_util/src/lib.rs @@ -23,18 +23,18 @@ use std::{io, mem, net}; use serde::{Deserialize, Serialize}; use thiserror::Error; use virtio_bindings::virtio_net::{ - virtio_net_hdr_v1, VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MAX, VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MIN, - VIRTIO_NET_F_GUEST_CSUM, VIRTIO_NET_F_GUEST_ECN, VIRTIO_NET_F_GUEST_TSO4, - VIRTIO_NET_F_GUEST_TSO6, VIRTIO_NET_F_GUEST_UFO, VIRTIO_NET_F_MAC, VIRTIO_NET_F_MQ, + VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MAX, VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MIN, VIRTIO_NET_F_GUEST_CSUM, + VIRTIO_NET_F_GUEST_ECN, VIRTIO_NET_F_GUEST_TSO4, VIRTIO_NET_F_GUEST_TSO6, + VIRTIO_NET_F_GUEST_UFO, VIRTIO_NET_F_MAC, VIRTIO_NET_F_MQ, virtio_net_hdr_v1, }; -use vm_memory::bitmap::AtomicBitmap; use vm_memory::ByteValued; +use vm_memory::bitmap::AtomicBitmap; type GuestMemoryMmap = vm_memory::GuestMemoryMmap; pub use ctrl_queue::{CtrlQueue, Error as CtrlQueueError}; -pub use mac::{MacAddr, MAC_ADDR_LEN}; -pub use open_tap::{open_tap, Error as OpenTapError}; +pub use mac::{MAC_ADDR_LEN, MacAddr}; +pub use open_tap::{Error as OpenTapError, open_tap}; pub use queue_pair::{NetCounters, NetQueuePair, NetQueuePairError, RxVirtio, TxVirtio}; pub use tap::{Error as TapError, Tap}; diff --git a/net_util/src/open_tap.rs b/net_util/src/open_tap.rs index 21c48f8392..61e763ba20 100644 --- a/net_util/src/open_tap.rs +++ b/net_util/src/open_tap.rs @@ -8,7 +8,7 @@ use std::{fs, io}; use thiserror::Error; -use super::{vnet_hdr_len, MacAddr, Tap, TapError}; +use super::{MacAddr, Tap, TapError, vnet_hdr_len}; #[derive(Error, Debug)] pub enum Error { diff --git a/net_util/src/queue_pair.rs b/net_util/src/queue_pair.rs index f28d759fe5..63fe677509 100644 --- a/net_util/src/queue_pair.rs +++ b/net_util/src/queue_pair.rs @@ -5,8 +5,8 @@ use std::io; use std::num::Wrapping; use std::os::unix::io::{AsRawFd, RawFd}; -use std::sync::atomic::{AtomicU64, Ordering}; use std::sync::Arc; +use std::sync::atomic::{AtomicU64, Ordering}; use rate_limiter::{RateLimiter, TokenType}; use thiserror::Error; @@ -15,7 +15,7 @@ use vm_memory::bitmap::Bitmap; use vm_memory::{Bytes, GuestMemory}; use vm_virtio::{AccessPlatform, Translatable}; -use super::{register_listener, unregister_listener, vnet_hdr_len, Tap}; +use super::{Tap, register_listener, unregister_listener, vnet_hdr_len}; #[derive(Clone)] pub struct TxVirtio { diff --git a/net_util/src/tap.rs b/net_util/src/tap.rs index 591dbc4912..2544b9eee7 100644 --- a/net_util/src/tap.rs +++ b/net_util/src/tap.rs @@ -15,8 +15,8 @@ use thiserror::Error; use vmm_sys_util::ioctl::{ioctl_with_mut_ref, ioctl_with_ref, ioctl_with_val}; use super::{ - create_inet_socket, create_sockaddr, create_unix_socket, vnet_hdr_len, Error as NetUtilError, - MacAddr, + Error as NetUtilError, MacAddr, create_inet_socket, create_sockaddr, create_unix_socket, + vnet_hdr_len, }; use crate::mac::MAC_ADDR_LEN; @@ -551,7 +551,7 @@ impl AsRawFd for Tap { #[cfg(test)] mod tests { use std::net::Ipv4Addr; - use std::sync::{mpsc, LazyLock, Mutex}; + use std::sync::{LazyLock, Mutex, mpsc}; use std::time::Duration; use std::{str, thread}; @@ -860,15 +860,17 @@ mod tests { // We use a separate thread to wait for the test packet because the API exposed by pnet is // blocking. This thread will be killed when the main thread exits. - let _handle = thread::spawn(move || loop { - let buf = rx.next().unwrap(); - let p = ParsedPkt::new(buf); - p.print(); - - if let Some(ref udp) = p.udp { - if payload == udp.payload() { - channel_tx.send(true).unwrap(); - break; + let _handle = thread::spawn(move || { + loop { + let buf = rx.next().unwrap(); + let p = ParsedPkt::new(buf); + p.print(); + + if let Some(ref udp) = p.udp { + if payload == udp.payload() { + channel_tx.send(true).unwrap(); + break; + } } } }); diff --git a/pci/src/bus.rs b/pci/src/bus.rs index f6f8ce2d01..fd19321de5 100644 --- a/pci/src/bus.rs +++ b/pci/src/bus.rs @@ -13,11 +13,11 @@ use byteorder::{ByteOrder, LittleEndian}; use thiserror::Error; use vm_device::{Bus, BusDevice, BusDeviceSync}; +use crate::PciBarConfiguration; use crate::configuration::{ PciBarRegionType, PciBridgeSubclass, PciClassCode, PciConfiguration, PciHeaderType, }; use crate::device::{BarReprogrammingParams, DeviceRelocation, Error as PciDeviceError, PciDevice}; -use crate::PciBarConfiguration; const VENDOR_ID_INTEL: u16 = 0x8086; const DEVICE_ID_INTEL_VIRT_PCIE_HOST: u16 = 0x0d57; diff --git a/pci/src/device.rs b/pci/src/device.rs index cddb30fce9..3c5b3315f8 100644 --- a/pci/src/device.rs +++ b/pci/src/device.rs @@ -12,8 +12,8 @@ use thiserror::Error; use vm_allocator::{AddressAllocator, SystemAllocator}; use vm_device::Resource; -use crate::configuration::{self, PciBarRegionType}; use crate::PciBarConfiguration; +use crate::configuration::{self, PciBarRegionType}; #[derive(Error, Debug)] pub enum Error { diff --git a/pci/src/lib.rs b/pci/src/lib.rs index 438a8ce94a..c95a38b339 100644 --- a/pci/src/lib.rs +++ b/pci/src/lib.rs @@ -24,16 +24,16 @@ use serde::de::Visitor; pub use self::bus::{PciBus, PciConfigIo, PciConfigMmio, PciRoot, PciRootError}; pub use self::configuration::{ - PciBarConfiguration, PciBarPrefetchable, PciBarRegionType, PciCapability, PciCapabilityId, - PciClassCode, PciConfiguration, PciExpressCapabilityId, PciHeaderType, PciMassStorageSubclass, - PciNetworkControllerSubclass, PciProgrammingInterface, PciSerialBusSubClass, PciSubclass, - PCI_CONFIGURATION_ID, + PCI_CONFIGURATION_ID, PciBarConfiguration, PciBarPrefetchable, PciBarRegionType, PciCapability, + PciCapabilityId, PciClassCode, PciConfiguration, PciExpressCapabilityId, PciHeaderType, + PciMassStorageSubclass, PciNetworkControllerSubclass, PciProgrammingInterface, + PciSerialBusSubClass, PciSubclass, }; pub use self::device::{ BarReprogrammingParams, DeviceRelocation, Error as PciDeviceError, PciDevice, }; -pub use self::msi::{msi_num_enabled_vectors, MsiCap, MsiConfig}; -pub use self::msix::{MsixCap, MsixConfig, MsixTableEntry, MSIX_CONFIG_ID, MSIX_TABLE_ENTRY_SIZE}; +pub use self::msi::{MsiCap, MsiConfig, msi_num_enabled_vectors}; +pub use self::msix::{MSIX_CONFIG_ID, MSIX_TABLE_ENTRY_SIZE, MsixCap, MsixConfig, MsixTableEntry}; pub use self::vfio::{MmioRegion, VfioDmaMapping, VfioPciDevice, VfioPciError}; pub use self::vfio_user::{VfioUserDmaMapping, VfioUserPciDevice, VfioUserPciDeviceError}; diff --git a/pci/src/vfio.rs b/pci/src/vfio.rs index 42048b8f5f..97be4a7bc1 100644 --- a/pci/src/vfio.rs +++ b/pci/src/vfio.rs @@ -14,7 +14,7 @@ use std::sync::{Arc, Barrier, Mutex}; use anyhow::anyhow; use byteorder::{ByteOrder, LittleEndian}; use hypervisor::HypervisorVmError; -use libc::{sysconf, _SC_PAGESIZE}; +use libc::{_SC_PAGESIZE, sysconf}; use serde::{Deserialize, Serialize}; use thiserror::Error; use vfio_bindings::bindings::vfio::*; @@ -34,13 +34,13 @@ use vm_memory::{Address, GuestAddress, GuestAddressSpace, GuestMemory, GuestUsiz use vm_migration::{Migratable, MigratableError, Pausable, Snapshot, Snapshottable, Transportable}; use vmm_sys_util::eventfd::EventFd; -use crate::msi::{MsiConfigState, MSI_CONFIG_ID}; +use crate::msi::{MSI_CONFIG_ID, MsiConfigState}; use crate::msix::MsixConfigState; use crate::{ - msi_num_enabled_vectors, BarReprogrammingParams, MsiCap, MsiConfig, MsixCap, MsixConfig, - PciBarConfiguration, PciBarPrefetchable, PciBarRegionType, PciBdf, PciCapabilityId, - PciClassCode, PciConfiguration, PciDevice, PciDeviceError, PciExpressCapabilityId, - PciHeaderType, PciSubclass, MSIX_CONFIG_ID, MSIX_TABLE_ENTRY_SIZE, PCI_CONFIGURATION_ID, + BarReprogrammingParams, MSIX_CONFIG_ID, MSIX_TABLE_ENTRY_SIZE, MsiCap, MsiConfig, MsixCap, + MsixConfig, PCI_CONFIGURATION_ID, PciBarConfiguration, PciBarPrefetchable, PciBarRegionType, + PciBdf, PciCapabilityId, PciClassCode, PciConfiguration, PciDevice, PciDeviceError, + PciExpressCapabilityId, PciHeaderType, PciSubclass, msi_num_enabled_vectors, }; pub(crate) const VFIO_COMMON_ID: &str = "vfio_common"; @@ -1664,9 +1664,8 @@ impl VfioPciDevice { if !is_page_size_aligned(area.size) || !is_page_size_aligned(area.offset) { warn!( "Could not mmap sparse area that is not page size aligned (offset = 0x{:x}, size = 0x{:x})", - area.offset, - area.size, - ); + area.offset, area.size, + ); return Ok(()); } @@ -2040,9 +2039,9 @@ impl ExternalDmaMapping for VfioDmaMapping t as u64, Err(e) => { - return Err(io::Error::other( - format!("unable to retrieve user address for gpa 0x{gpa:x} from guest memory region: {e}") - )); + return Err(io::Error::other(format!( + "unable to retrieve user address for gpa 0x{gpa:x} from guest memory region: {e}" + ))); } } } else if self.mmio_regions.lock().unwrap().check_range(gpa, size) { diff --git a/pci/src/vfio_user.rs b/pci/src/vfio_user.rs index f23259f48f..7ca1d28814 100644 --- a/pci/src/vfio_user.rs +++ b/pci/src/vfio_user.rs @@ -24,7 +24,7 @@ use vm_memory::{ use vm_migration::{Migratable, MigratableError, Pausable, Snapshot, Snapshottable, Transportable}; use vmm_sys_util::eventfd::EventFd; -use crate::vfio::{UserMemoryRegion, Vfio, VfioCommon, VfioError, VFIO_COMMON_ID}; +use crate::vfio::{UserMemoryRegion, VFIO_COMMON_ID, Vfio, VfioCommon, VfioError}; use crate::{ BarReprogrammingParams, PciBarConfiguration, PciBdf, PciDevice, PciDeviceError, PciSubclass, VfioPciError, diff --git a/performance-metrics/src/main.rs b/performance-metrics/src/main.rs index c348155817..220532f6cb 100644 --- a/performance-metrics/src/main.rs +++ b/performance-metrics/src/main.rs @@ -9,8 +9,8 @@ extern crate test_infra; mod performance_tests; use std::process::Command; -use std::sync::mpsc::channel; use std::sync::Arc; +use std::sync::mpsc::channel; use std::time::Duration; use std::{env, fmt, thread}; diff --git a/performance-metrics/src/performance_tests.rs b/performance-metrics/src/performance_tests.rs index 46eb090fcc..e29dca1743 100644 --- a/performance-metrics/src/performance_tests.rs +++ b/performance-metrics/src/performance_tests.rs @@ -12,7 +12,7 @@ use std::{fs, thread}; use test_infra::{Error as InfraError, *}; use thiserror::Error; -use crate::{mean, ImageFormat, PerformanceTestControl, PerformanceTestOverrides}; +use crate::{ImageFormat, PerformanceTestControl, PerformanceTestOverrides, mean}; #[cfg(target_arch = "x86_64")] pub const FOCAL_IMAGE_NAME: &str = "focal-server-cloudimg-amd64-custom-20210609-0.raw"; diff --git a/rate_limiter/src/group.rs b/rate_limiter/src/group.rs index 51e18196a8..f883115641 100644 --- a/rate_limiter/src/group.rs +++ b/rate_limiter/src/group.rs @@ -306,7 +306,7 @@ pub(crate) mod tests { use super::RateLimiterGroupHandle; use crate::group::RateLimiterGroup; - use crate::{TokenBucket, TokenType, REFILL_TIMER_INTERVAL_MS}; + use crate::{REFILL_TIMER_INTERVAL_MS, TokenBucket, TokenType}; impl RateLimiterGroupHandle { fn bandwidth(&self) -> Option { diff --git a/rate_limiter/src/lib.rs b/rate_limiter/src/lib.rs index 9d67993386..72221416f1 100644 --- a/rate_limiter/src/lib.rs +++ b/rate_limiter/src/lib.rs @@ -48,8 +48,8 @@ extern crate log; use std::io; use std::os::unix::io::{AsRawFd, RawFd}; -use std::sync::atomic::{AtomicBool, Ordering}; use std::sync::Mutex; +use std::sync::atomic::{AtomicBool, Ordering}; use std::time::{Duration, Instant}; use thiserror::Error; @@ -470,7 +470,7 @@ impl RateLimiter { std::io::ErrorKind::WouldBlock => { return Err(Error::SpuriousRateLimiterEvent( "Rate limiter event handler called without a present timer", - )) + )); } _ => return Err(Error::TimerFdWaitError(err)), } diff --git a/serial_buffer/src/lib.rs b/serial_buffer/src/lib.rs index 6b9182d4c7..f914f2ef54 100644 --- a/serial_buffer/src/lib.rs +++ b/serial_buffer/src/lib.rs @@ -5,8 +5,8 @@ use std::collections::VecDeque; use std::io::Write; -use std::sync::atomic::{AtomicBool, Ordering}; use std::sync::Arc; +use std::sync::atomic::{AtomicBool, Ordering}; const MAX_BUFFER_SIZE: usize = 1 << 20; diff --git a/src/bin/ch-remote.rs b/src/bin/ch-remote.rs index 904565e5cd..803ffc7ee9 100644 --- a/src/bin/ch-remote.rs +++ b/src/bin/ch-remote.rs @@ -13,8 +13,8 @@ use std::os::unix::net::UnixStream; use std::process; use api_client::{ - simple_api_command, simple_api_command_with_fds, simple_api_full_command, - Error as ApiClientError, + Error as ApiClientError, simple_api_command, simple_api_command_with_fds, + simple_api_full_command, }; use clap::{Arg, ArgAction, ArgMatches, Command}; use log::error; @@ -1128,7 +1128,9 @@ fn main() { process::exit(1); } _ => { - error!("Please either provide the api-socket option or dbus-service-name and dbus-object-path options"); + error!( + "Please either provide the api-socket option or dbus-service-name and dbus-object-path options" + ); process::exit(1); } }; diff --git a/src/main.rs b/src/main.rs index 8329100b6f..3c94f36d88 100644 --- a/src/main.rs +++ b/src/main.rs @@ -8,22 +8,22 @@ mod test_util; use std::fs::File; use std::os::unix::io::{AsRawFd, FromRawFd, RawFd}; -use std::sync::mpsc::channel; use std::sync::Mutex; +use std::sync::mpsc::channel; use std::{env, io}; use clap::{Arg, ArgAction, ArgGroup, ArgMatches, Command}; use event_monitor::event; use libc::EFD_NONBLOCK; -use log::{error, warn, LevelFilter}; +use log::{LevelFilter, error, warn}; use option_parser::OptionParser; use seccompiler::SeccompAction; use signal_hook::consts::SIGSYS; use thiserror::Error; +use vmm::api::ApiAction; #[cfg(feature = "dbus_api")] -use vmm::api::dbus::{dbus_api_graceful_shutdown, DBusApiOptions}; +use vmm::api::dbus::{DBusApiOptions, dbus_api_graceful_shutdown}; use vmm::api::http::http_api_graceful_shutdown; -use vmm::api::ApiAction; use vmm::config::{RestoreConfig, VmParams}; use vmm::landlock::{Landlock, LandlockError}; use vmm::vm_config; diff --git a/test_infra/src/lib.rs b/test_infra/src/lib.rs index 812a3a9339..641c5a15ab 100644 --- a/test_infra/src/lib.rs +++ b/test_infra/src/lib.rs @@ -1118,10 +1118,12 @@ impl Guest { thread::sleep(std::time::Duration::new(10, 0)); // Write something to vsock from the host - assert!(exec_host_command_status(&format!( - "echo -e \"CONNECT 16\\nHelloWorld!\" | socat - UNIX-CONNECT:{socket}" - )) - .success()); + assert!( + exec_host_command_status(&format!( + "echo -e \"CONNECT 16\\nHelloWorld!\" | socat - UNIX-CONNECT:{socket}" + )) + .success() + ); // Wait for the thread to terminate. listen_socat.join().unwrap(); @@ -1134,10 +1136,11 @@ impl Guest { #[cfg(target_arch = "x86_64")] pub fn check_nvidia_gpu(&self) { - assert!(self - .ssh_command("nvidia-smi") - .unwrap() - .contains("NVIDIA L40S")); + assert!( + self.ssh_command("nvidia-smi") + .unwrap() + .contains("NVIDIA L40S") + ); } pub fn reboot_linux(&self, current_reboot_count: u32, custom_timeout: Option) { @@ -1334,11 +1337,9 @@ impl<'a> GuestCommand<'a> { if pipesize >= PIPE_SIZE && pipesize1 >= PIPE_SIZE { Ok(child) } else { - Err(std::io::Error::other( - format!( - "resizing pipe w/ 'fnctl' failed: stdout pipesize {pipesize}, stderr pipesize {pipesize1}" - ), - )) + Err(std::io::Error::other(format!( + "resizing pipe w/ 'fnctl' failed: stdout pipesize {pipesize}, stderr pipesize {pipesize1}" + ))) } } else { // The caller should call .wait() on the returned child diff --git a/tests/integration.rs b/tests/integration.rs index 255c27adf1..bdb258a22b 100644 --- a/tests/integration.rs +++ b/tests/integration.rs @@ -20,7 +20,7 @@ use std::path::PathBuf; use std::process::{Child, Command, Stdio}; use std::string::String; use std::sync::mpsc::Receiver; -use std::sync::{mpsc, Mutex}; +use std::sync::{Mutex, mpsc}; use std::time::Duration; use std::{fs, io, thread}; @@ -719,10 +719,12 @@ fn setup_ovs_dpdk() { assert!(exec_host_command_status("service openvswitch-switch restart").success()); // Create OVS-DPDK bridge and ports - assert!(exec_host_command_status( - "ovs-vsctl add-br ovsbr0 -- set bridge ovsbr0 datapath_type=netdev", - ) - .success()); + assert!( + exec_host_command_status( + "ovs-vsctl add-br ovsbr0 -- set bridge ovsbr0 datapath_type=netdev", + ) + .success() + ); assert!(exec_host_command_status("ovs-vsctl add-port ovsbr0 vhost-user1 -- set Interface vhost-user1 type=dpdkvhostuserclient options:vhost-server-path=/tmp/dpdkvhostclient1").success()); assert!(exec_host_command_status("ovs-vsctl add-port ovsbr0 vhost-user2 -- set Interface vhost-user2 type=dpdkvhostuserclient options:vhost-server-path=/tmp/dpdkvhostclient2").success()); assert!(exec_host_command_status("ip link set up dev ovsbr0").success()); @@ -1658,8 +1660,10 @@ fn _test_virtio_fs( "{{\"id\":\"myfs0\",\"bdf\":\"{pci_segment:04x}:00:01.0\"}}" ))); } else { - assert!(String::from_utf8_lossy(&cmd_output) - .contains("{\"id\":\"myfs0\",\"bdf\":\"0000:00:06.0\"}")); + assert!( + String::from_utf8_lossy(&cmd_output) + .contains("{\"id\":\"myfs0\",\"bdf\":\"0000:00:06.0\"}") + ); } thread::sleep(std::time::Duration::new(10, 0)); @@ -1739,8 +1743,10 @@ fn _test_virtio_fs( "{{\"id\":\"myfs0\",\"bdf\":\"{pci_segment:04x}:00:01.0\"}}" ))); } else { - assert!(String::from_utf8_lossy(&cmd_output) - .contains("{\"id\":\"myfs0\",\"bdf\":\"0000:00:06.0\"}")); + assert!( + String::from_utf8_lossy(&cmd_output) + .contains("{\"id\":\"myfs0\",\"bdf\":\"0000:00:06.0\"}") + ); } thread::sleep(std::time::Duration::new(10, 0)); @@ -1894,8 +1900,10 @@ fn _test_virtio_vsock(hotplug: bool) { Some(format!("cid=3,socket={socket},id=test0").as_str()), ); assert!(cmd_success); - assert!(String::from_utf8_lossy(&cmd_output) - .contains("{\"id\":\"test0\",\"bdf\":\"0000:00:06.0\"}")); + assert!( + String::from_utf8_lossy(&cmd_output) + .contains("{\"id\":\"test0\",\"bdf\":\"0000:00:06.0\"}") + ); thread::sleep(std::time::Duration::new(10, 0)); // Check adding a second one fails assert!(!remote_command( @@ -2112,18 +2120,20 @@ fn get_counters(api_socket: &str) -> Counters { fn pty_read(mut pty: std::fs::File) -> Receiver { let (tx, rx) = mpsc::channel::(); - thread::spawn(move || loop { - thread::sleep(std::time::Duration::new(1, 0)); - let mut buf = [0; 512]; - match pty.read(&mut buf) { - Ok(_bytes) => { - let output = std::str::from_utf8(&buf).unwrap().to_string(); - match tx.send(output) { - Ok(_) => (), - Err(_) => break, + thread::spawn(move || { + loop { + thread::sleep(std::time::Duration::new(1, 0)); + let mut buf = [0; 512]; + match pty.read(&mut buf) { + Ok(_bytes) => { + let output = std::str::from_utf8(&buf).unwrap().to_string(); + match tx.send(output) { + Ok(_) => (), + Err(_) => break, + } } + Err(_) => break, } - Err(_) => break, } }); rx @@ -2257,9 +2267,11 @@ fn _test_virtio_iommu(acpi: bool) { guest.wait_vm_boot(None).unwrap(); // Verify the virtio-iommu device is present. - assert!(guest - .does_device_vendor_pair_match("0x1057", "0x1af4") - .unwrap_or_default()); + assert!( + guest + .does_device_vendor_pair_match("0x1057", "0x1af4") + .unwrap_or_default() + ); // On AArch64, if the guest system boots from FDT, the behavior of IOMMU is a bit // different with ACPI. @@ -2319,9 +2331,11 @@ fn get_reboot_count(guest: &Guest) -> u32 { fn enable_guest_watchdog(guest: &Guest, watchdog_sec: u32) { // Check for PCI device - assert!(guest - .does_device_vendor_pair_match("0x1063", "0x1af4") - .unwrap_or_default()); + assert!( + guest + .does_device_vendor_pair_match("0x1063", "0x1af4") + .unwrap_or_default() + ); // Enable systemd watchdog guest @@ -2335,9 +2349,11 @@ fn enable_guest_watchdog(guest: &Guest, watchdog_sec: u32) { fn make_guest_panic(guest: &Guest) { // Check for pvpanic device - assert!(guest - .does_device_vendor_pair_match("0x0011", "0x1b36") - .unwrap_or_default()); + assert!( + guest + .does_device_vendor_pair_match("0x0011", "0x1b36") + .unwrap_or_default() + ); // Trigger guest a panic guest.ssh_command("screen -dmS reboot sh -c \"sleep 5; echo s | tee /proc/sysrq-trigger; echo c | sudo tee /proc/sysrq-trigger\"").unwrap(); @@ -2974,13 +2990,17 @@ mod common_parallel { ), ); assert!(cmd_success); - assert!(String::from_utf8_lossy(&cmd_output) - .contains("{\"id\":\"test0\",\"bdf\":\"0001:00:01.0\"}")); + assert!( + String::from_utf8_lossy(&cmd_output) + .contains("{\"id\":\"test0\",\"bdf\":\"0001:00:01.0\"}") + ); // Check IOMMU setup - assert!(guest - .does_device_vendor_pair_match("0x1057", "0x1af4") - .unwrap_or_default()); + assert!( + guest + .does_device_vendor_pair_match("0x1057", "0x1af4") + .unwrap_or_default() + ); assert_eq!( guest .ssh_command("ls /sys/kernel/iommu_groups/0/devices") @@ -4429,9 +4449,11 @@ mod common_parallel { let r = std::panic::catch_unwind(|| { guest.wait_vm_boot(None).unwrap(); - assert!(guest - .does_device_vendor_pair_match("0x1043", "0x1af4") - .unwrap_or_default()); + assert!( + guest + .does_device_vendor_pair_match("0x1043", "0x1af4") + .unwrap_or_default() + ); guest.ssh_command(&cmd).unwrap(); }); @@ -5407,8 +5429,10 @@ mod common_parallel { ), ); assert!(cmd_success); - assert!(String::from_utf8_lossy(&cmd_output) - .contains("{\"id\":\"test0\",\"bdf\":\"0000:00:06.0\"}")); + assert!( + String::from_utf8_lossy(&cmd_output) + .contains("{\"id\":\"test0\",\"bdf\":\"0000:00:06.0\"}") + ); thread::sleep(std::time::Duration::new(10, 0)); @@ -5454,8 +5478,10 @@ mod common_parallel { ), ); assert!(cmd_success); - assert!(String::from_utf8_lossy(&cmd_output) - .contains("{\"id\":\"test0\",\"bdf\":\"0000:00:06.0\"}")); + assert!( + String::from_utf8_lossy(&cmd_output) + .contains("{\"id\":\"test0\",\"bdf\":\"0000:00:06.0\"}") + ); thread::sleep(std::time::Duration::new(10, 0)); @@ -5953,8 +5979,10 @@ mod common_parallel { "{{\"id\":\"test0\",\"bdf\":\"{pci_segment:04x}:00:01.0\"}}" ))); } else { - assert!(String::from_utf8_lossy(&cmd_output) - .contains("{\"id\":\"test0\",\"bdf\":\"0000:00:06.0\"}")); + assert!( + String::from_utf8_lossy(&cmd_output) + .contains("{\"id\":\"test0\",\"bdf\":\"0000:00:06.0\"}") + ); } // Check that /dev/pmem0 exists and the block size is 128M @@ -6084,8 +6112,10 @@ mod common_parallel { "{{\"id\":\"test0\",\"bdf\":\"{pci_segment:04x}:00:01.0\"}}" ))); } else { - assert!(String::from_utf8_lossy(&cmd_output) - .contains("{\"id\":\"test0\",\"bdf\":\"0000:00:05.0\"}")); + assert!( + String::from_utf8_lossy(&cmd_output) + .contains("{\"id\":\"test0\",\"bdf\":\"0000:00:05.0\"}") + ); } thread::sleep(std::time::Duration::new(5, 0)); @@ -6128,8 +6158,10 @@ mod common_parallel { "{{\"id\":\"test1\",\"bdf\":\"{pci_segment:04x}:00:01.0\"}}" ))); } else { - assert!(String::from_utf8_lossy(&cmd_output) - .contains("{\"id\":\"test1\",\"bdf\":\"0000:00:05.0\"}")); + assert!( + String::from_utf8_lossy(&cmd_output) + .contains("{\"id\":\"test1\",\"bdf\":\"0000:00:05.0\"}") + ); } thread::sleep(std::time::Duration::new(5, 0)); @@ -6566,15 +6598,19 @@ mod common_parallel { let phy_net = "eth0"; // Create a macvtap interface for the guest VM to use - assert!(exec_host_command_status(&format!( - "sudo ip link add link {phy_net} name {guest_macvtap_name} type macvtap mod bridge" - )) - .success()); - assert!(exec_host_command_status(&format!( - "sudo ip link set {} address {} up", - guest_macvtap_name, guest.network.guest_mac - )) - .success()); + assert!( + exec_host_command_status(&format!( + "sudo ip link add link {phy_net} name {guest_macvtap_name} type macvtap mod bridge" + )) + .success() + ); + assert!( + exec_host_command_status(&format!( + "sudo ip link set {} address {} up", + guest_macvtap_name, guest.network.guest_mac + )) + .success() + ); assert!( exec_host_command_status(&format!("sudo ip link show {guest_macvtap_name}")).success() ); @@ -6593,16 +6629,20 @@ mod common_parallel { // Create a macvtap on the same physical net interface for // the host machine to use - assert!(exec_host_command_status(&format!( - "sudo ip link add link {phy_net} name {host_macvtap_name} type macvtap mod bridge" - )) - .success()); + assert!( + exec_host_command_status(&format!( + "sudo ip link add link {phy_net} name {host_macvtap_name} type macvtap mod bridge" + )) + .success() + ); // Use default mask "255.255.255.0" - assert!(exec_host_command_status(&format!( - "sudo ip address add {}/24 dev {}", - guest.network.host_ip, host_macvtap_name - )) - .success()); + assert!( + exec_host_command_status(&format!( + "sudo ip address add {}/24 dev {}", + guest.network.host_ip, host_macvtap_name + )) + .success() + ); assert!( exec_host_command_status(&format!("sudo ip link set dev {host_macvtap_name} up")) .success() @@ -6638,11 +6678,15 @@ mod common_parallel { remote_command_w_output(&api_socket, "add-net", Some(&net_params)); assert!(cmd_success); #[cfg(target_arch = "x86_64")] - assert!(String::from_utf8_lossy(&cmd_output) - .contains("{\"id\":\"_net2\",\"bdf\":\"0000:00:05.0\"}")); + assert!( + String::from_utf8_lossy(&cmd_output) + .contains("{\"id\":\"_net2\",\"bdf\":\"0000:00:05.0\"}") + ); #[cfg(target_arch = "aarch64")] - assert!(String::from_utf8_lossy(&cmd_output) - .contains("{\"id\":\"_net0\",\"bdf\":\"0000:00:05.0\"}")); + assert!( + String::from_utf8_lossy(&cmd_output) + .contains("{\"id\":\"_net0\",\"bdf\":\"0000:00:05.0\"}") + ); } // The functional connectivity provided by the virtio-net device @@ -6818,21 +6862,27 @@ mod common_parallel { fn setup_spdk_nvme(nvme_dir: &std::path::Path) -> Child { cleanup_spdk_nvme(); - assert!(exec_host_command_status(&format!( - "mkdir -p {}", - nvme_dir.join("nvme-vfio-user").to_str().unwrap() - )) - .success()); - assert!(exec_host_command_status(&format!( - "truncate {} -s 128M", - nvme_dir.join("test-disk.raw").to_str().unwrap() - )) - .success()); - assert!(exec_host_command_status(&format!( - "mkfs.ext4 {}", - nvme_dir.join("test-disk.raw").to_str().unwrap() - )) - .success()); + assert!( + exec_host_command_status(&format!( + "mkdir -p {}", + nvme_dir.join("nvme-vfio-user").to_str().unwrap() + )) + .success() + ); + assert!( + exec_host_command_status(&format!( + "truncate {} -s 128M", + nvme_dir.join("test-disk.raw").to_str().unwrap() + )) + .success() + ); + assert!( + exec_host_command_status(&format!( + "mkfs.ext4 {}", + nvme_dir.join("test-disk.raw").to_str().unwrap() + )) + .success() + ); // Start the SPDK nvmf_tgt daemon to present NVMe device as a VFIO user device let child = Command::new("/usr/local/bin/spdk-nvme/nvmf_tgt") @@ -6846,11 +6896,13 @@ mod common_parallel { 3, std::time::Duration::new(5, 0), )); - assert!(exec_host_command_status(&format!( - "/usr/local/bin/spdk-nvme/rpc.py bdev_aio_create {} test 512", - nvme_dir.join("test-disk.raw").to_str().unwrap() - )) - .success()); + assert!( + exec_host_command_status(&format!( + "/usr/local/bin/spdk-nvme/rpc.py bdev_aio_create {} test 512", + nvme_dir.join("test-disk.raw").to_str().unwrap() + )) + .success() + ); assert!(exec_host_command_status( "/usr/local/bin/spdk-nvme/rpc.py nvmf_create_subsystem nqn.2019-07.io.spdk:cnode -a -s test" ) @@ -6911,8 +6963,10 @@ mod common_parallel { )), ); assert!(cmd_success); - assert!(String::from_utf8_lossy(&cmd_output) - .contains("{\"id\":\"vfio_user0\",\"bdf\":\"0000:00:05.0\"}")); + assert!( + String::from_utf8_lossy(&cmd_output) + .contains("{\"id\":\"vfio_user0\",\"bdf\":\"0000:00:05.0\"}") + ); thread::sleep(std::time::Duration::new(10, 0)); @@ -7017,15 +7071,19 @@ mod common_parallel { Some("id=myvdpa0,path=/dev/vhost-vdpa-1,num_queues=1,pci_segment=1,iommu=on"), ); assert!(cmd_success); - assert!(String::from_utf8_lossy(&cmd_output) - .contains("{\"id\":\"myvdpa0\",\"bdf\":\"0001:00:01.0\"}")); + assert!( + String::from_utf8_lossy(&cmd_output) + .contains("{\"id\":\"myvdpa0\",\"bdf\":\"0001:00:01.0\"}") + ); thread::sleep(std::time::Duration::new(10, 0)); // Check IOMMU setup - assert!(guest - .does_device_vendor_pair_match("0x1057", "0x1af4") - .unwrap_or_default()); + assert!( + guest + .does_device_vendor_pair_match("0x1057", "0x1af4") + .unwrap_or_default() + ); assert_eq!( guest .ssh_command("ls /sys/kernel/iommu_groups/0/devices") @@ -7419,7 +7477,7 @@ mod ivshmem { use std::fs::remove_dir_all; use std::process::Command; - use test_infra::{handle_child_output, kill_child, Guest, GuestCommand, UbuntuDiskConfig}; + use test_infra::{Guest, GuestCommand, UbuntuDiskConfig, handle_child_output, kill_child}; use crate::*; @@ -9404,8 +9462,10 @@ mod windows { Some(format!("path={disk},readonly=off").as_str()), ); assert!(cmd_success); - assert!(String::from_utf8_lossy(&cmd_output) - .contains(format!("\"id\":\"{disk_id}\"").as_str())); + assert!( + String::from_utf8_lossy(&cmd_output) + .contains(format!("\"id\":\"{disk_id}\"").as_str()) + ); thread::sleep(std::time::Duration::new(5, 0)); // Online disk devices windows_guest.disks_set_rw(); @@ -9615,8 +9675,10 @@ mod vfio { Some(format!("id=vfio0,path={NVIDIA_VFIO_DEVICE}").as_str()), ); assert!(cmd_success); - assert!(String::from_utf8_lossy(&cmd_output) - .contains("{\"id\":\"vfio0\",\"bdf\":\"0000:00:06.0\"}")); + assert!( + String::from_utf8_lossy(&cmd_output) + .contains("{\"id\":\"vfio0\",\"bdf\":\"0000:00:06.0\"}") + ); thread::sleep(std::time::Duration::new(10, 0)); @@ -9694,10 +9756,12 @@ mod vfio { let r = std::panic::catch_unwind(|| { guest.wait_vm_boot(None).unwrap(); - assert!(guest - .ssh_command("sudo dmesg") - .unwrap() - .contains("input address: 42 bits")); + assert!( + guest + .ssh_command("sudo dmesg") + .unwrap() + .contains("input address: 42 bits") + ); }); let _ = child.kill(); @@ -9818,13 +9882,13 @@ mod live_migration { let _ = dest_vm.kill(); let dest_output = dest_vm.wait_with_output().unwrap(); eprintln!( - "\n\n==== Start 'destination_vm' stdout ====\n\n{}\n\n==== End 'destination_vm' stdout ====", - String::from_utf8_lossy(&dest_output.stdout) - ); + "\n\n==== Start 'destination_vm' stdout ====\n\n{}\n\n==== End 'destination_vm' stdout ====", + String::from_utf8_lossy(&dest_output.stdout) + ); eprintln!( - "\n\n==== Start 'destination_vm' stderr ====\n\n{}\n\n==== End 'destination_vm' stderr ====", - String::from_utf8_lossy(&dest_output.stderr) - ); + "\n\n==== Start 'destination_vm' stderr ====\n\n{}\n\n==== End 'destination_vm' stderr ====", + String::from_utf8_lossy(&dest_output.stderr) + ); if let Some(ovs_vm) = ovs_vm { let mut ovs_vm = ovs_vm; @@ -11382,11 +11446,13 @@ mod rate_limiter { String::from(test_img_dir.as_path().join("blk.img").to_str().unwrap()); // Create the test block image - assert!(exec_host_command_output(&format!( - "dd if=/dev/zero of={blk_rate_limiter_test_img} bs=1M count=1024" - )) - .status - .success()); + assert!( + exec_host_command_output(&format!( + "dd if=/dev/zero of={blk_rate_limiter_test_img} bs=1M count=1024" + )) + .status + .success() + ); let test_blk_params = if bandwidth { format!( @@ -11491,11 +11557,13 @@ mod rate_limiter { .unwrap(), ); - assert!(exec_host_command_output(&format!( - "dd if=/dev/zero of={test_img_path} bs=1M count=1024" - )) - .status - .success()); + assert!( + exec_host_command_output(&format!( + "dd if=/dev/zero of={test_img_path} bs=1M count=1024" + )) + .status + .success() + ); disk_args.push(format!( "path={test_img_path},num_queues={num_queues},rate_limit_group=group0" diff --git a/vhost_user_block/src/lib.rs b/vhost_user_block/src/lib.rs index 5e668c0c5a..3977a25ab5 100644 --- a/vhost_user_block/src/lib.rs +++ b/vhost_user_block/src/lib.rs @@ -19,13 +19,13 @@ use std::time::Instant; use std::{convert, io, process, result}; use block::qcow::{self, ImageType, QcowFile}; -use block::{build_serial, Request, VirtioBlockConfig}; +use block::{Request, VirtioBlockConfig, build_serial}; use libc::EFD_NONBLOCK; use log::*; use option_parser::{OptionParser, OptionParserError, Toggle}; use thiserror::Error; -use vhost::vhost_user::message::*; use vhost::vhost_user::Listener; +use vhost::vhost_user::message::*; use vhost_user_backend::bitmap::BitmapMmapRegion; use vhost_user_backend::{VhostUserBackendMut, VhostUserDaemon, VringRwLock, VringState, VringT}; use virtio_bindings::virtio_blk::*; diff --git a/vhost_user_net/src/lib.rs b/vhost_user_net/src/lib.rs index de2a416aa0..b5c0aa8019 100644 --- a/vhost_user_net/src/lib.rs +++ b/vhost_user_net/src/lib.rs @@ -15,12 +15,12 @@ use std::{io, process}; use libc::EFD_NONBLOCK; use log::*; use net_util::{ - open_tap, MacAddr, NetCounters, NetQueuePair, OpenTapError, RxVirtio, Tap, TxVirtio, + MacAddr, NetCounters, NetQueuePair, OpenTapError, RxVirtio, Tap, TxVirtio, open_tap, }; use option_parser::{OptionParser, OptionParserError, Toggle}; use thiserror::Error; -use vhost::vhost_user::message::*; use vhost::vhost_user::Listener; +use vhost::vhost_user::message::*; use vhost_user_backend::bitmap::BitmapMmapRegion; use vhost_user_backend::{VhostUserBackendMut, VhostUserDaemon, VringRwLock, VringT}; use virtio_bindings::virtio_config::{VIRTIO_F_NOTIFY_ON_EMPTY, VIRTIO_F_VERSION_1}; diff --git a/virtio-devices/src/balloon.rs b/virtio-devices/src/balloon.rs index 2a34b6688f..1a5e202fdf 100644 --- a/virtio-devices/src/balloon.rs +++ b/virtio-devices/src/balloon.rs @@ -37,9 +37,9 @@ use vmm_sys_util::eventfd::EventFd; use crate::seccomp_filters::Thread; use crate::thread_helper::spawn_virtio_thread; use crate::{ - ActivateResult, EpollHelper, EpollHelperError, EpollHelperHandler, GuestMemoryMmap, - VirtioCommon, VirtioDevice, VirtioDeviceType, VirtioInterrupt, VirtioInterruptType, - EPOLL_HELPER_EVENT_LAST, VIRTIO_F_VERSION_1, + ActivateResult, EPOLL_HELPER_EVENT_LAST, EpollHelper, EpollHelperError, EpollHelperHandler, + GuestMemoryMmap, VIRTIO_F_VERSION_1, VirtioCommon, VirtioDevice, VirtioDeviceType, + VirtioInterrupt, VirtioInterruptType, }; const QUEUE_SIZE: u16 = 128; @@ -575,12 +575,12 @@ impl VirtioDevice for Balloon { let data_len = data.len() as u64; if offset + data_len > config_len { error!( - "Out-of-bound access to configuration: config_len = {} offset = {:x} length = {} for {}", - config_len, - offset, - data_len, - self.device_type() - ); + "Out-of-bound access to configuration: config_len = {} offset = {:x} length = {} for {}", + config_len, + offset, + data_len, + self.device_type() + ); return; } diff --git a/virtio-devices/src/block.rs b/virtio-devices/src/block.rs index 415ae3f365..dad4fd2158 100644 --- a/virtio-devices/src/block.rs +++ b/virtio-devices/src/block.rs @@ -19,12 +19,12 @@ use std::{io, result}; use anyhow::anyhow; use block::async_io::{AsyncIo, AsyncIoError, DiskFile}; -use block::fcntl::{get_lock_state, LockError, LockType}; +use block::fcntl::{LockError, LockType, get_lock_state}; use block::{ - build_serial, fcntl, ExecuteAsync, ExecuteError, Request, RequestType, VirtioBlockConfig, + ExecuteAsync, ExecuteError, Request, RequestType, VirtioBlockConfig, build_serial, fcntl, }; -use rate_limiter::group::{RateLimiterGroup, RateLimiterGroupHandle}; use rate_limiter::TokenType; +use rate_limiter::group::{RateLimiterGroup, RateLimiterGroupHandle}; use seccompiler::SeccompAction; use serde::{Deserialize, Serialize}; use thiserror::Error; @@ -38,9 +38,9 @@ use vm_virtio::AccessPlatform; use vmm_sys_util::eventfd::EventFd; use super::{ - ActivateError, ActivateResult, EpollHelper, EpollHelperError, EpollHelperHandler, - Error as DeviceError, VirtioCommon, VirtioDevice, VirtioDeviceType, VirtioInterruptType, - EPOLL_HELPER_EVENT_LAST, + ActivateError, ActivateResult, EPOLL_HELPER_EVENT_LAST, EpollHelper, EpollHelperError, + EpollHelperHandler, Error as DeviceError, VirtioCommon, VirtioDevice, VirtioDeviceType, + VirtioInterruptType, }; use crate::seccomp_filters::Thread; use crate::thread_helper::spawn_virtio_thread; diff --git a/virtio-devices/src/console.rs b/virtio-devices/src/console.rs index f05c0c4f4d..2c0e250a4d 100644 --- a/virtio-devices/src/console.rs +++ b/virtio-devices/src/console.rs @@ -22,9 +22,9 @@ use vm_virtio::{AccessPlatform, Translatable}; use vmm_sys_util::eventfd::EventFd; use super::{ - ActivateResult, EpollHelper, EpollHelperError, EpollHelperHandler, Error as DeviceError, - VirtioCommon, VirtioDevice, VirtioDeviceType, VirtioInterruptType, EPOLL_HELPER_EVENT_LAST, - VIRTIO_F_IOMMU_PLATFORM, VIRTIO_F_VERSION_1, + ActivateResult, EPOLL_HELPER_EVENT_LAST, EpollHelper, EpollHelperError, EpollHelperHandler, + Error as DeviceError, VIRTIO_F_IOMMU_PLATFORM, VIRTIO_F_VERSION_1, VirtioCommon, VirtioDevice, + VirtioDeviceType, VirtioInterruptType, }; use crate::seccomp_filters::Thread; use crate::thread_helper::spawn_virtio_thread; diff --git a/virtio-devices/src/iommu.rs b/virtio-devices/src/iommu.rs index df310823af..71c10c1d0c 100644 --- a/virtio-devices/src/iommu.rs +++ b/virtio-devices/src/iommu.rs @@ -24,8 +24,8 @@ use vm_virtio::AccessPlatform; use vmm_sys_util::eventfd::EventFd; use super::{ - ActivateResult, EpollHelper, EpollHelperError, EpollHelperHandler, Error as DeviceError, - VirtioCommon, VirtioDevice, VirtioDeviceType, EPOLL_HELPER_EVENT_LAST, VIRTIO_F_VERSION_1, + ActivateResult, EPOLL_HELPER_EVENT_LAST, EpollHelper, EpollHelperError, EpollHelperHandler, + Error as DeviceError, VIRTIO_F_VERSION_1, VirtioCommon, VirtioDevice, VirtioDeviceType, }; use crate::seccomp_filters::Thread; use crate::thread_helper::spawn_virtio_thread; diff --git a/virtio-devices/src/lib.rs b/virtio-devices/src/lib.rs index a59661eb61..86359da657 100644 --- a/virtio-devices/src/lib.rs +++ b/virtio-devices/src/lib.rs @@ -51,10 +51,10 @@ pub use self::device::{ VirtioSharedMemoryList, }; pub use self::epoll_helper::{ - EpollHelper, EpollHelperError, EpollHelperHandler, EPOLL_HELPER_EVENT_LAST, + EPOLL_HELPER_EVENT_LAST, EpollHelper, EpollHelperError, EpollHelperHandler, }; pub use self::iommu::{AccessPlatformMapping, Iommu, IommuMapping}; -pub use self::mem::{BlocksState, Mem, VirtioMemMappingSource, VIRTIO_MEM_ALIGN_SIZE}; +pub use self::mem::{BlocksState, Mem, VIRTIO_MEM_ALIGN_SIZE, VirtioMemMappingSource}; pub use self::net::{Net, NetCtrlEpollHandler}; pub use self::pmem::Pmem; pub use self::rng::Rng; diff --git a/virtio-devices/src/mem.rs b/virtio-devices/src/mem.rs index 8d5830ac3c..6971f07151 100644 --- a/virtio-devices/src/mem.rs +++ b/virtio-devices/src/mem.rs @@ -18,7 +18,7 @@ use std::collections::BTreeMap; use std::mem::size_of; use std::os::unix::io::{AsRawFd, RawFd}; use std::sync::atomic::AtomicBool; -use std::sync::{mpsc, Arc, Barrier, Mutex}; +use std::sync::{Arc, Barrier, Mutex, mpsc}; use std::{io, result}; use anyhow::anyhow; @@ -36,9 +36,9 @@ use vm_migration::{Migratable, MigratableError, Pausable, Snapshot, Snapshottabl use vmm_sys_util::eventfd::EventFd; use super::{ - ActivateError, ActivateResult, EpollHelper, EpollHelperError, EpollHelperHandler, - Error as DeviceError, VirtioCommon, VirtioDevice, VirtioDeviceType, EPOLL_HELPER_EVENT_LAST, - VIRTIO_F_VERSION_1, + ActivateError, ActivateResult, EPOLL_HELPER_EVENT_LAST, EpollHelper, EpollHelperError, + EpollHelperHandler, Error as DeviceError, VIRTIO_F_VERSION_1, VirtioCommon, VirtioDevice, + VirtioDeviceType, }; use crate::seccomp_filters::Thread; use crate::thread_helper::spawn_virtio_thread; diff --git a/virtio-devices/src/net.rs b/virtio-devices/src/net.rs index 950cedb519..4518321ecf 100644 --- a/virtio-devices/src/net.rs +++ b/virtio-devices/src/net.rs @@ -18,8 +18,8 @@ use anyhow::anyhow; #[cfg(not(fuzzing))] use net_util::virtio_features_to_tap_offload; use net_util::{ - build_net_config_space, build_net_config_space_with_mq, open_tap, CtrlQueue, MacAddr, - NetCounters, NetQueuePair, OpenTapError, RxVirtio, Tap, TapError, TxVirtio, VirtioNetConfig, + CtrlQueue, MacAddr, NetCounters, NetQueuePair, OpenTapError, RxVirtio, Tap, TapError, TxVirtio, + VirtioNetConfig, build_net_config_space, build_net_config_space_with_mq, open_tap, }; use seccompiler::SeccompAction; use serde::{Deserialize, Serialize}; @@ -34,9 +34,9 @@ use vm_virtio::AccessPlatform; use vmm_sys_util::eventfd::EventFd; use super::{ - ActivateError, ActivateResult, EpollHelper, EpollHelperError, EpollHelperHandler, - Error as DeviceError, RateLimiterConfig, VirtioCommon, VirtioDevice, VirtioDeviceType, - VirtioInterruptType, EPOLL_HELPER_EVENT_LAST, + ActivateError, ActivateResult, EPOLL_HELPER_EVENT_LAST, EpollHelper, EpollHelperError, + EpollHelperHandler, Error as DeviceError, RateLimiterConfig, VirtioCommon, VirtioDevice, + VirtioDeviceType, VirtioInterruptType, }; use crate::seccomp_filters::Thread; use crate::thread_helper::spawn_virtio_thread; diff --git a/virtio-devices/src/pmem.rs b/virtio-devices/src/pmem.rs index 1e4353477c..8e41f0ec05 100644 --- a/virtio-devices/src/pmem.rs +++ b/virtio-devices/src/pmem.rs @@ -28,9 +28,9 @@ use vm_virtio::{AccessPlatform, Translatable}; use vmm_sys_util::eventfd::EventFd; use super::{ - ActivateError, ActivateResult, EpollHelper, EpollHelperError, EpollHelperHandler, - Error as DeviceError, VirtioCommon, VirtioDevice, VirtioDeviceType, EPOLL_HELPER_EVENT_LAST, - VIRTIO_F_IOMMU_PLATFORM, VIRTIO_F_VERSION_1, + ActivateError, ActivateResult, EPOLL_HELPER_EVENT_LAST, EpollHelper, EpollHelperError, + EpollHelperHandler, Error as DeviceError, VIRTIO_F_IOMMU_PLATFORM, VIRTIO_F_VERSION_1, + VirtioCommon, VirtioDevice, VirtioDeviceType, }; use crate::seccomp_filters::Thread; use crate::thread_helper::spawn_virtio_thread; diff --git a/virtio-devices/src/rng.rs b/virtio-devices/src/rng.rs index 8429e3b31e..6dccb2de19 100644 --- a/virtio-devices/src/rng.rs +++ b/virtio-devices/src/rng.rs @@ -21,9 +21,9 @@ use vm_virtio::{AccessPlatform, Translatable}; use vmm_sys_util::eventfd::EventFd; use super::{ - ActivateError, ActivateResult, EpollHelper, EpollHelperError, EpollHelperHandler, - Error as DeviceError, VirtioCommon, VirtioDevice, VirtioDeviceType, EPOLL_HELPER_EVENT_LAST, - VIRTIO_F_IOMMU_PLATFORM, VIRTIO_F_VERSION_1, + ActivateError, ActivateResult, EPOLL_HELPER_EVENT_LAST, EpollHelper, EpollHelperError, + EpollHelperHandler, Error as DeviceError, VIRTIO_F_IOMMU_PLATFORM, VIRTIO_F_VERSION_1, + VirtioCommon, VirtioDevice, VirtioDeviceType, }; use crate::seccomp_filters::Thread; use crate::thread_helper::spawn_virtio_thread; diff --git a/virtio-devices/src/seccomp_filters.rs b/virtio-devices/src/seccomp_filters.rs index abad842bb1..5986e72ea3 100644 --- a/virtio-devices/src/seccomp_filters.rs +++ b/virtio-devices/src/seccomp_filters.rs @@ -53,13 +53,15 @@ const VFIO_IOMMU_UNMAP_DMA: u64 = 0x3b72; #[cfg(feature = "sev_snp")] fn mshv_sev_snp_ioctl_seccomp_rule() -> SeccompRule { - and![Cond::new( - 1, - ArgLen::Dword, - Eq, - mshv_ioctls::MSHV_MODIFY_GPA_HOST_ACCESS() - ) - .unwrap()] + and![ + Cond::new( + 1, + ArgLen::Dword, + Eq, + mshv_ioctls::MSHV_MODIFY_GPA_HOST_ACCESS() + ) + .unwrap() + ] } #[cfg(feature = "sev_snp")] diff --git a/virtio-devices/src/thread_helper.rs b/virtio-devices/src/thread_helper.rs index 41eb99b7c7..d4df6c86b6 100644 --- a/virtio-devices/src/thread_helper.rs +++ b/virtio-devices/src/thread_helper.rs @@ -6,12 +6,12 @@ use std::panic::AssertUnwindSafe; use std::thread::{self, JoinHandle}; -use seccompiler::{apply_filter, SeccompAction}; +use seccompiler::{SeccompAction, apply_filter}; use vmm_sys_util::eventfd::EventFd; -use crate::epoll_helper::EpollHelperError; -use crate::seccomp_filters::{get_seccomp_filter, Thread}; use crate::ActivateError; +use crate::epoll_helper::EpollHelperError; +use crate::seccomp_filters::{Thread, get_seccomp_filter}; pub(crate) fn spawn_virtio_thread( name: &str, diff --git a/virtio-devices/src/transport/mod.rs b/virtio-devices/src/transport/mod.rs index fae6f166b8..9214de5dbe 100644 --- a/virtio-devices/src/transport/mod.rs +++ b/virtio-devices/src/transport/mod.rs @@ -5,7 +5,7 @@ use vmm_sys_util::eventfd::EventFd; mod pci_common_config; mod pci_device; -pub use pci_common_config::{VirtioPciCommonConfig, VIRTIO_PCI_COMMON_CONFIG_ID}; +pub use pci_common_config::{VIRTIO_PCI_COMMON_CONFIG_ID, VirtioPciCommonConfig}; pub use pci_device::{VirtioPciDevice, VirtioPciDeviceActivator, VirtioPciDeviceError}; pub trait VirtioTransport { diff --git a/virtio-devices/src/transport/pci_device.rs b/virtio-devices/src/transport/pci_device.rs index f493e32ab0..a16102218e 100644 --- a/virtio-devices/src/transport/pci_device.rs +++ b/virtio-devices/src/transport/pci_device.rs @@ -35,11 +35,11 @@ use vm_virtio::AccessPlatform; use vmm_sys_util::eventfd::EventFd; use super::pci_common_config::VirtioPciCommonConfigState; -use crate::transport::{VirtioPciCommonConfig, VirtioTransport, VIRTIO_PCI_COMMON_CONFIG_ID}; +use crate::transport::{VIRTIO_PCI_COMMON_CONFIG_ID, VirtioPciCommonConfig, VirtioTransport}; use crate::{ - ActivateResult, GuestMemoryMmap, VirtioDevice, VirtioDeviceType, VirtioInterrupt, - VirtioInterruptType, DEVICE_ACKNOWLEDGE, DEVICE_DRIVER, DEVICE_DRIVER_OK, DEVICE_FAILED, - DEVICE_FEATURES_OK, DEVICE_INIT, + ActivateResult, DEVICE_ACKNOWLEDGE, DEVICE_DRIVER, DEVICE_DRIVER_OK, DEVICE_FAILED, + DEVICE_FEATURES_OK, DEVICE_INIT, GuestMemoryMmap, VirtioDevice, VirtioDeviceType, + VirtioInterrupt, VirtioInterruptType, }; /// Vector value used to disable MSI for a queue. @@ -973,7 +973,7 @@ impl PciDevice for VirtioPciDevice { settings_bar_addr = Some(GuestAddress(base)); use_64bit_bar = match type_ { PciBarType::Io => { - return Err(PciDeviceError::InvalidResource(resource)) + return Err(PciDeviceError::InvalidResource(resource)); } PciBarType::Mmio32 => false, PciBarType::Mmio64 => true, diff --git a/virtio-devices/src/vdpa.rs b/virtio-devices/src/vdpa.rs index decd6ede64..6ca4f7ddaa 100644 --- a/virtio-devices/src/vdpa.rs +++ b/virtio-devices/src/vdpa.rs @@ -12,9 +12,9 @@ use anyhow::anyhow; use serde::{Deserialize, Serialize}; use thiserror::Error; use vhost::vdpa::{VhostVdpa, VhostVdpaIovaRange}; +use vhost::vhost_kern::VhostKernFeatures; use vhost::vhost_kern::vdpa::VhostKernVdpa; use vhost::vhost_kern::vhost_binding::VHOST_BACKEND_F_SUSPEND; -use vhost::vhost_kern::VhostKernFeatures; use vhost::{VhostBackend, VringConfigData}; use virtio_queue::desc::RawDescriptor; use virtio_queue::{Queue, QueueT}; @@ -25,9 +25,9 @@ use vm_virtio::{AccessPlatform, Translatable}; use vmm_sys_util::eventfd::EventFd; use crate::{ - ActivateError, ActivateResult, GuestMemoryMmap, VirtioCommon, VirtioDevice, VirtioInterrupt, - VirtioInterruptType, DEVICE_ACKNOWLEDGE, DEVICE_DRIVER, DEVICE_DRIVER_OK, DEVICE_FEATURES_OK, - VIRTIO_F_IOMMU_PLATFORM, + ActivateError, ActivateResult, DEVICE_ACKNOWLEDGE, DEVICE_DRIVER, DEVICE_DRIVER_OK, + DEVICE_FEATURES_OK, GuestMemoryMmap, VIRTIO_F_IOMMU_PLATFORM, VirtioCommon, VirtioDevice, + VirtioInterrupt, VirtioInterruptType, }; #[derive(Error, Debug)] diff --git a/virtio-devices/src/vhost_user/blk.rs b/virtio-devices/src/vhost_user/blk.rs index 87ca4130a8..d009280a86 100644 --- a/virtio-devices/src/vhost_user/blk.rs +++ b/virtio-devices/src/vhost_user/blk.rs @@ -9,8 +9,8 @@ use block::VirtioBlockConfig; use seccompiler::SeccompAction; use serde::{Deserialize, Serialize}; use vhost::vhost_user::message::{ - VhostUserConfigFlags, VhostUserProtocolFeatures, VhostUserVirtioFeatures, - VHOST_USER_CONFIG_OFFSET, + VHOST_USER_CONFIG_OFFSET, VhostUserConfigFlags, VhostUserProtocolFeatures, + VhostUserVirtioFeatures, }; use vhost::vhost_user::{FrontendReqHandler, VhostUserFrontend, VhostUserFrontendReqHandler}; use virtio_bindings::virtio_blk::{ @@ -26,11 +26,11 @@ use vmm_sys_util::eventfd::EventFd; use super::super::{ActivateResult, VirtioCommon, VirtioDevice, VirtioDeviceType}; use super::vu_common_ctrl::{VhostUserConfig, VhostUserHandle}; -use super::{Error, Result, DEFAULT_VIRTIO_FEATURES}; +use super::{DEFAULT_VIRTIO_FEATURES, Error, Result}; use crate::seccomp_filters::Thread; use crate::thread_helper::spawn_virtio_thread; use crate::vhost_user::VhostUserCommon; -use crate::{GuestMemoryMmap, GuestRegionMmap, VirtioInterrupt, VIRTIO_F_IOMMU_PLATFORM}; +use crate::{GuestMemoryMmap, GuestRegionMmap, VIRTIO_F_IOMMU_PLATFORM, VirtioInterrupt}; const DEFAULT_QUEUE_NUMBER: usize = 1; @@ -134,8 +134,10 @@ impl Blk { }; if num_queues > backend_num_queues { - error!("vhost-user-blk requested too many queues ({}) since the backend only supports {}\n", - num_queues, backend_num_queues); + error!( + "vhost-user-blk requested too many queues ({}) since the backend only supports {}\n", + num_queues, backend_num_queues + ); return Err(Error::BadQueueNum); } diff --git a/virtio-devices/src/vhost_user/fs.rs b/virtio-devices/src/vhost_user/fs.rs index 4aa6920475..4c70db2f1a 100644 --- a/virtio-devices/src/vhost_user/fs.rs +++ b/virtio-devices/src/vhost_user/fs.rs @@ -7,7 +7,7 @@ use std::{result, thread}; use seccompiler::SeccompAction; use serde::{Deserialize, Serialize}; -use serde_with::{serde_as, Bytes}; +use serde_with::{Bytes, serde_as}; use vhost::vhost_user::message::{VhostUserProtocolFeatures, VhostUserVirtioFeatures}; use vhost::vhost_user::{FrontendReqHandler, VhostUserFrontend, VhostUserFrontendReqHandler}; use virtio_queue::Queue; @@ -18,13 +18,13 @@ use vm_migration::{Migratable, MigratableError, Pausable, Snapshot, Snapshottabl use vmm_sys_util::eventfd::EventFd; use super::vu_common_ctrl::VhostUserHandle; -use super::{Error, Result, DEFAULT_VIRTIO_FEATURES}; +use super::{DEFAULT_VIRTIO_FEATURES, Error, Result}; use crate::seccomp_filters::Thread; use crate::thread_helper::spawn_virtio_thread; use crate::vhost_user::VhostUserCommon; use crate::{ - ActivateResult, GuestMemoryMmap, GuestRegionMmap, MmapRegion, VirtioCommon, VirtioDevice, - VirtioDeviceType, VirtioInterrupt, VirtioSharedMemoryList, VIRTIO_F_IOMMU_PLATFORM, + ActivateResult, GuestMemoryMmap, GuestRegionMmap, MmapRegion, VIRTIO_F_IOMMU_PLATFORM, + VirtioCommon, VirtioDevice, VirtioDeviceType, VirtioInterrupt, VirtioSharedMemoryList, }; const NUM_QUEUE_OFFSET: usize = 1; @@ -148,9 +148,9 @@ impl Fs { if num_queues > backend_num_queues { error!( - "vhost-user-fs requested too many queues ({}) since the backend only supports {}\n", - num_queues, backend_num_queues - ); + "vhost-user-fs requested too many queues ({}) since the backend only supports {}\n", + num_queues, backend_num_queues + ); return Err(Error::BadQueueNum); } diff --git a/virtio-devices/src/vhost_user/mod.rs b/virtio-devices/src/vhost_user/mod.rs index 188942721f..14aa173b78 100644 --- a/virtio-devices/src/vhost_user/mod.rs +++ b/virtio-devices/src/vhost_user/mod.rs @@ -10,11 +10,11 @@ use std::sync::{Arc, Barrier, Mutex}; use anyhow::anyhow; use serde::{Deserialize, Serialize}; use thiserror::Error; +use vhost::Error as VhostError; use vhost::vhost_user::message::{ VhostUserInflight, VhostUserProtocolFeatures, VhostUserVirtioFeatures, }; use vhost::vhost_user::{FrontendReqHandler, VhostUserFrontendReqHandler}; -use vhost::Error as VhostError; use virtio_queue::{Error as QueueError, Queue}; use vm_memory::mmap::MmapRegionError; use vm_memory::{Address, Error as MmapError, GuestAddressSpace, GuestMemory, GuestMemoryAtomic}; @@ -24,10 +24,10 @@ use vmm_sys_util::eventfd::EventFd; use vu_common_ctrl::VhostUserHandle; use crate::{ - ActivateError, EpollHelper, EpollHelperError, EpollHelperHandler, GuestMemoryMmap, - GuestRegionMmap, VirtioInterrupt, EPOLL_HELPER_EVENT_LAST, VIRTIO_F_IN_ORDER, - VIRTIO_F_NOTIFICATION_DATA, VIRTIO_F_ORDER_PLATFORM, VIRTIO_F_RING_EVENT_IDX, - VIRTIO_F_RING_INDIRECT_DESC, VIRTIO_F_VERSION_1, + ActivateError, EPOLL_HELPER_EVENT_LAST, EpollHelper, EpollHelperError, EpollHelperHandler, + GuestMemoryMmap, GuestRegionMmap, VIRTIO_F_IN_ORDER, VIRTIO_F_NOTIFICATION_DATA, + VIRTIO_F_ORDER_PLATFORM, VIRTIO_F_RING_EVENT_IDX, VIRTIO_F_RING_INDIRECT_DESC, + VIRTIO_F_VERSION_1, VirtioInterrupt, }; pub mod blk; diff --git a/virtio-devices/src/vhost_user/net.rs b/virtio-devices/src/vhost_user/net.rs index c52d5ca385..99c0f81555 100644 --- a/virtio-devices/src/vhost_user/net.rs +++ b/virtio-devices/src/vhost_user/net.rs @@ -5,7 +5,7 @@ use std::sync::atomic::AtomicBool; use std::sync::{Arc, Barrier, Mutex}; use std::{result, thread}; -use net_util::{build_net_config_space, CtrlQueue, MacAddr, VirtioNetConfig}; +use net_util::{CtrlQueue, MacAddr, VirtioNetConfig, build_net_config_space}; use seccompiler::SeccompAction; use serde::{Deserialize, Serialize}; use vhost::vhost_user::message::{VhostUserProtocolFeatures, VhostUserVirtioFeatures}; @@ -28,9 +28,9 @@ use crate::thread_helper::spawn_virtio_thread; use crate::vhost_user::vu_common_ctrl::{VhostUserConfig, VhostUserHandle}; use crate::vhost_user::{Error, Result, VhostUserCommon}; use crate::{ - ActivateResult, GuestMemoryMmap, GuestRegionMmap, NetCtrlEpollHandler, VirtioCommon, - VirtioDevice, VirtioDeviceType, VirtioInterrupt, VIRTIO_F_IOMMU_PLATFORM, - VIRTIO_F_RING_EVENT_IDX, VIRTIO_F_VERSION_1, + ActivateResult, GuestMemoryMmap, GuestRegionMmap, NetCtrlEpollHandler, VIRTIO_F_IOMMU_PLATFORM, + VIRTIO_F_RING_EVENT_IDX, VIRTIO_F_VERSION_1, VirtioCommon, VirtioDevice, VirtioDeviceType, + VirtioInterrupt, }; const DEFAULT_QUEUE_NUMBER: usize = 2; @@ -168,8 +168,10 @@ impl Net { }; if num_queues > backend_num_queues { - error!("vhost-user-net requested too many queues ({}) since the backend only supports {}\n", - num_queues, backend_num_queues); + error!( + "vhost-user-net requested too many queues ({}) since the backend only supports {}\n", + num_queues, backend_num_queues + ); return Err(Error::BadQueueNum); } diff --git a/virtio-devices/src/vhost_user/vu_common_ctrl.rs b/virtio-devices/src/vhost_user/vu_common_ctrl.rs index e1204fbd7d..91fb55a207 100644 --- a/virtio-devices/src/vhost_user/vu_common_ctrl.rs +++ b/virtio-devices/src/vhost_user/vu_common_ctrl.rs @@ -5,8 +5,8 @@ use std::ffi; use std::fs::File; use std::os::unix::io::{AsRawFd, FromRawFd, RawFd}; use std::os::unix::net::UnixListener; -use std::sync::atomic::Ordering; use std::sync::Arc; +use std::sync::atomic::Ordering; use std::thread::sleep; use std::time::{Duration, Instant}; @@ -29,8 +29,8 @@ use vmm_sys_util::eventfd::EventFd; use super::{Error, Result}; use crate::vhost_user::Inflight; use crate::{ - get_host_address_range, GuestMemoryMmap, GuestRegionMmap, MmapRegion, VirtioInterrupt, - VirtioInterruptType, + GuestMemoryMmap, GuestRegionMmap, MmapRegion, VirtioInterrupt, VirtioInterruptType, + get_host_address_range, }; // Size of a dirty page for vhost-user. @@ -399,7 +399,7 @@ impl VhostUserHandle { acked_features: 0, vrings_info: None, queue_indexes: Vec::new(), - }) + }); } Err(e) => e, }; diff --git a/virtio-devices/src/vsock/csm/connection.rs b/virtio-devices/src/vsock/csm/connection.rs index 50f3c30341..e36f58ccb7 100644 --- a/virtio-devices/src/vsock/csm/connection.rs +++ b/virtio-devices/src/vsock/csm/connection.rs @@ -89,7 +89,7 @@ use super::super::defs::uapi; use super::super::packet::VsockPacket; use super::super::{Result as VsockResult, VsockChannel, VsockEpollListener, VsockError}; use super::txbuf::TxBuf; -use super::{defs, ConnState, Error, PendingRx, PendingRxSet, Result}; +use super::{ConnState, Error, PendingRx, PendingRxSet, Result, defs}; /// A self-managing connection object, that handles communication between a guest-side AF_VSOCK /// socket and a host-side `Read + Write + AsRawFd` stream. @@ -1158,10 +1158,11 @@ mod tests { // When there's data in the TX buffer, the connection should ask to be notified when it // can write to its backing stream. - assert!(ctx - .conn - .get_polled_evset() - .contains(epoll::Events::EPOLLOUT)); + assert!( + ctx.conn + .get_polled_evset() + .contains(epoll::Events::EPOLLOUT) + ); assert_eq!(ctx.conn.tx_buf.len(), data.len()); // Unlock the write stream and notify the connection it can now write its buffered @@ -1212,10 +1213,11 @@ mod tests { stream.write_state = StreamState::Closed; ctx.set_stream(stream); - assert!(ctx - .conn - .get_polled_evset() - .contains(epoll::Events::EPOLLOUT)); + assert!( + ctx.conn + .get_polled_evset() + .contains(epoll::Events::EPOLLOUT) + ); ctx.notify_epollout(); assert_eq!(ctx.conn.state, ConnState::Killed); } diff --git a/virtio-devices/src/vsock/csm/txbuf.rs b/virtio-devices/src/vsock/csm/txbuf.rs index 4c16913f34..1a8c5dd151 100644 --- a/virtio-devices/src/vsock/csm/txbuf.rs +++ b/virtio-devices/src/vsock/csm/txbuf.rs @@ -5,7 +5,7 @@ use std::io::Write; use std::num::Wrapping; -use super::{defs, Error, Result}; +use super::{Error, Result, defs}; /// A simple ring-buffer implementation, used by vsock connections to buffer TX (guest -> host) /// data. Memory for this buffer is allocated lazily, since buffering will only be needed when diff --git a/virtio-devices/src/vsock/device.rs b/virtio-devices/src/vsock/device.rs index f8c024833b..4d073c4923 100644 --- a/virtio-devices/src/vsock/device.rs +++ b/virtio-devices/src/vsock/device.rs @@ -47,10 +47,10 @@ use super::{VsockBackend, VsockPacket}; use crate::seccomp_filters::Thread; use crate::thread_helper::spawn_virtio_thread; use crate::{ - ActivateResult, EpollHelper, EpollHelperError, EpollHelperHandler, Error as DeviceError, - GuestMemoryMmap, VirtioCommon, VirtioDevice, VirtioDeviceType, VirtioInterrupt, - VirtioInterruptType, EPOLL_HELPER_EVENT_LAST, VIRTIO_F_IN_ORDER, VIRTIO_F_IOMMU_PLATFORM, - VIRTIO_F_VERSION_1, + ActivateResult, EPOLL_HELPER_EVENT_LAST, EpollHelper, EpollHelperError, EpollHelperHandler, + Error as DeviceError, GuestMemoryMmap, VIRTIO_F_IN_ORDER, VIRTIO_F_IOMMU_PLATFORM, + VIRTIO_F_VERSION_1, VirtioCommon, VirtioDevice, VirtioDeviceType, VirtioInterrupt, + VirtioInterruptType, }; const QUEUE_SIZE: u16 = 256; diff --git a/virtio-devices/src/vsock/mod.rs b/virtio-devices/src/vsock/mod.rs index ecd523d1c0..3cd9592a64 100644 --- a/virtio-devices/src/vsock/mod.rs +++ b/virtio-devices/src/vsock/mod.rs @@ -172,7 +172,7 @@ pub mod tests { use vm_virtio::queue::testing::VirtQueue as GuestQ; use vmm_sys_util::eventfd::EventFd; - use super::device::{VsockEpollHandler, RX_QUEUE_EVENT, TX_QUEUE_EVENT}; + use super::device::{RX_QUEUE_EVENT, TX_QUEUE_EVENT, VsockEpollHandler}; use super::packet::VSOCK_PKT_HDR_SIZE; use super::*; use crate::device::{VirtioInterrupt, VirtioInterruptType}; diff --git a/virtio-devices/src/vsock/packet.rs b/virtio-devices/src/vsock/packet.rs index 01ee18fbb1..a6749381df 100644 --- a/virtio-devices/src/vsock/packet.rs +++ b/virtio-devices/src/vsock/packet.rs @@ -24,7 +24,7 @@ use virtio_queue::DescriptorChain; use vm_memory::{Address, GuestMemory}; use vm_virtio::{AccessPlatform, Translatable}; -use super::{defs, Result, VsockError}; +use super::{Result, VsockError, defs}; use crate::get_host_address_range; // The vsock packet header is defined by the C struct: @@ -427,8 +427,8 @@ mod tests { use super::super::tests::TestContext; use super::*; - use crate::vsock::defs::MAX_PKT_BUF_SIZE; use crate::GuestMemoryMmap; + use crate::vsock::defs::MAX_PKT_BUF_SIZE; macro_rules! create_context { ($test_ctx:ident, $handler_ctx:ident) => { diff --git a/virtio-devices/src/vsock/unix/mod.rs b/virtio-devices/src/vsock/unix/mod.rs index 13c9883194..bb44698121 100644 --- a/virtio-devices/src/vsock/unix/mod.rs +++ b/virtio-devices/src/vsock/unix/mod.rs @@ -13,9 +13,9 @@ mod muxer; mod muxer_killq; mod muxer_rxq; +pub use Error as VsockUnixError; pub use muxer::VsockMuxer as VsockUnixBackend; use thiserror::Error; -pub use Error as VsockUnixError; mod defs { /// Maximum number of established connections that we can handle. diff --git a/virtio-devices/src/vsock/unix/muxer.rs b/virtio-devices/src/vsock/unix/muxer.rs index ebe0dc723b..58df496e20 100644 --- a/virtio-devices/src/vsock/unix/muxer.rs +++ b/virtio-devices/src/vsock/unix/muxer.rs @@ -52,7 +52,7 @@ use super::super::{ }; use super::muxer_killq::MuxerKillQ; use super::muxer_rxq::MuxerRxQ; -use super::{defs, Error, MuxerConnection, Result}; +use super::{Error, MuxerConnection, Result, defs}; /// A unique identifier of a `MuxerConnection` object. Connections are stored in a hash map, /// keyed by a `ConnMapKey` object. diff --git a/virtio-devices/src/vsock/unix/muxer_killq.rs b/virtio-devices/src/vsock/unix/muxer_killq.rs index 925f4d9383..5295ab69e1 100644 --- a/virtio-devices/src/vsock/unix/muxer_killq.rs +++ b/virtio-devices/src/vsock/unix/muxer_killq.rs @@ -29,7 +29,7 @@ use std::collections::{HashMap, VecDeque}; use std::time::Instant; use super::muxer::ConnMapKey; -use super::{defs, MuxerConnection}; +use super::{MuxerConnection, defs}; /// A kill queue item, holding the connection key and the scheduled time for termination. /// diff --git a/virtio-devices/src/vsock/unix/muxer_rxq.rs b/virtio-devices/src/vsock/unix/muxer_rxq.rs index 701db3459f..077cbc8899 100644 --- a/virtio-devices/src/vsock/unix/muxer_rxq.rs +++ b/virtio-devices/src/vsock/unix/muxer_rxq.rs @@ -20,7 +20,7 @@ use std::collections::{HashMap, VecDeque}; use super::super::VsockChannel; use super::muxer::{ConnMapKey, MuxerRx}; -use super::{defs, MuxerConnection}; +use super::{MuxerConnection, defs}; /// The muxer RX queue. /// diff --git a/virtio-devices/src/watchdog.rs b/virtio-devices/src/watchdog.rs index fdfc977d96..23a33a3e57 100644 --- a/virtio-devices/src/watchdog.rs +++ b/virtio-devices/src/watchdog.rs @@ -23,9 +23,9 @@ use vm_migration::{Migratable, MigratableError, Pausable, Snapshot, Snapshottabl use vmm_sys_util::eventfd::EventFd; use super::{ - ActivateError, ActivateResult, EpollHelper, EpollHelperError, EpollHelperHandler, - Error as DeviceError, VirtioCommon, VirtioDevice, VirtioDeviceType, EPOLL_HELPER_EVENT_LAST, - VIRTIO_F_VERSION_1, + ActivateError, ActivateResult, EPOLL_HELPER_EVENT_LAST, EpollHelper, EpollHelperError, + EpollHelperHandler, Error as DeviceError, VIRTIO_F_VERSION_1, VirtioCommon, VirtioDevice, + VirtioDeviceType, }; use crate::seccomp_filters::Thread; use crate::thread_helper::spawn_virtio_thread; diff --git a/vm-allocator/src/page_size.rs b/vm-allocator/src/page_size.rs index 96ae01edf7..6dedb6847a 100644 --- a/vm-allocator/src/page_size.rs +++ b/vm-allocator/src/page_size.rs @@ -1,7 +1,7 @@ // Copyright 2023 Arm Limited (or its affiliates). All rights reserved. // SPDX-License-Identifier: Apache-2.0 -use libc::{sysconf, _SC_PAGESIZE}; +use libc::{_SC_PAGESIZE, sysconf}; /// get host page size pub fn get_page_size() -> u64 { diff --git a/vmm/src/acpi.rs b/vmm/src/acpi.rs index 215deac912..de2f581294 100644 --- a/vmm/src/acpi.rs +++ b/vmm/src/acpi.rs @@ -5,16 +5,16 @@ use std::sync::{Arc, Mutex}; use std::time::Instant; +use acpi_tables::Aml; use acpi_tables::rsdp::Rsdp; #[cfg(target_arch = "aarch64")] use acpi_tables::sdt::GenericAddress; use acpi_tables::sdt::Sdt; -use acpi_tables::Aml; -#[cfg(target_arch = "aarch64")] -use arch::aarch64::DeviceInfoForFdt; #[cfg(target_arch = "aarch64")] use arch::DeviceType; use arch::NumaNodes; +#[cfg(target_arch = "aarch64")] +use arch::aarch64::DeviceInfoForFdt; use bitflags::bitflags; use pci::PciBdf; use tracer::trace_scoped; diff --git a/vmm/src/api/dbus/mod.rs b/vmm/src/api/dbus/mod.rs index 85bd9d54fa..f329b86bcd 100644 --- a/vmm/src/api/dbus/mod.rs +++ b/vmm/src/api/dbus/mod.rs @@ -3,14 +3,14 @@ // SPDX-License-Identifier: Apache-2.0 // use std::panic::AssertUnwindSafe; -use std::sync::mpsc::Sender; use std::sync::Arc; +use std::sync::mpsc::Sender; use std::thread; use futures::channel::oneshot; -use futures::{executor, FutureExt}; +use futures::{FutureExt, executor}; use hypervisor::HypervisorType; -use seccompiler::{apply_filter, SeccompAction}; +use seccompiler::{SeccompAction, apply_filter}; use vmm_sys_util::eventfd::EventFd; use zbus::connection::Builder; use zbus::fdo::{self, Result}; @@ -26,7 +26,7 @@ use crate::api::{ VmReceiveMigration, VmRemoveDevice, VmResize, VmResizeZone, VmRestore, VmResume, VmSendMigration, VmShutdown, VmSnapshot, VmmPing, VmmShutdown, }; -use crate::seccomp_filters::{get_seccomp_filter, Thread}; +use crate::seccomp_filters::{Thread, get_seccomp_filter}; use crate::{Error as VmmError, NetConfig, Result as VmmResult, VmConfig}; pub type DBusApiShutdownChannels = (oneshot::Sender<()>, oneshot::Receiver<()>); diff --git a/vmm/src/api/http/http_endpoint.rs b/vmm/src/api/http/http_endpoint.rs index ff1de56296..5a4bf93da6 100644 --- a/vmm/src/api/http/http_endpoint.rs +++ b/vmm/src/api/http/http_endpoint.rs @@ -11,9 +11,9 @@ use std::sync::mpsc::Sender; use micro_http::{Body, Method, Request, Response, StatusCode, Version}; use vmm_sys_util::eventfd::EventFd; -use crate::api::http::{error_response, EndpointHandler, HttpError}; #[cfg(all(target_arch = "x86_64", feature = "guest_debug"))] use crate::api::VmCoredump; +use crate::api::http::{EndpointHandler, HttpError, error_response}; use crate::api::{ AddDisk, ApiAction, ApiError, ApiRequest, NetConfig, VmAddDevice, VmAddFs, VmAddNet, VmAddPmem, VmAddUserDevice, VmAddVdpa, VmAddVsock, VmBoot, VmConfig, VmCounters, VmDelete, VmNmi, VmPause, diff --git a/vmm/src/api/http/mod.rs b/vmm/src/api/http/mod.rs index 418a4d4961..243429154d 100644 --- a/vmm/src/api/http/mod.rs +++ b/vmm/src/api/http/mod.rs @@ -10,15 +10,15 @@ use std::os::unix::io::{IntoRawFd, RawFd}; use std::os::unix::net::UnixListener; use std::panic::AssertUnwindSafe; use std::path::PathBuf; -use std::sync::mpsc::Sender; use std::sync::LazyLock; +use std::sync::mpsc::Sender; use std::thread; use hypervisor::HypervisorType; use micro_http::{ Body, HttpServer, MediaType, Method, Request, Response, ServerError, StatusCode, Version, }; -use seccompiler::{apply_filter, SeccompAction}; +use seccompiler::{SeccompAction, apply_filter}; use serde_json::Error as SerdeError; use thiserror::Error; use vmm_sys_util::eventfd::EventFd; @@ -33,7 +33,7 @@ use crate::api::{ VmSendMigration, VmShutdown, VmSnapshot, }; use crate::landlock::Landlock; -use crate::seccomp_filters::{get_seccomp_filter, Thread}; +use crate::seccomp_filters::{Thread, get_seccomp_filter}; use crate::{Error as VmmError, Result}; pub mod http_endpoint; diff --git a/vmm/src/api/mod.rs b/vmm/src/api/mod.rs index a856f49f55..5ef63ffa39 100644 --- a/vmm/src/api/mod.rs +++ b/vmm/src/api/mod.rs @@ -34,7 +34,7 @@ pub mod dbus; pub mod http; use std::io; -use std::sync::mpsc::{channel, RecvError, SendError, Sender}; +use std::sync::mpsc::{RecvError, SendError, Sender, channel}; use micro_http::Body; use serde::{Deserialize, Serialize}; @@ -45,6 +45,7 @@ use vmm_sys_util::eventfd::EventFd; #[cfg(feature = "dbus_api")] pub use self::dbus::start_dbus_thread; pub use self::http::{start_http_fd_thread, start_http_path_thread}; +use crate::Error as VmmError; use crate::config::RestoreConfig; use crate::device_tree::DeviceTree; use crate::vm::{Error as VmError, VmState}; @@ -52,7 +53,6 @@ use crate::vm_config::{ DeviceConfig, DiskConfig, FsConfig, NetConfig, PmemConfig, UserDeviceConfig, VdpaConfig, VmConfig, VsockConfig, }; -use crate::Error as VmmError; /// API errors are sent back from the VMM API server through the ApiResponse. #[derive(Error, Debug)] diff --git a/vmm/src/clone3.rs b/vmm/src/clone3.rs index fc273680f8..eca0d90632 100644 --- a/vmm/src/clone3.rs +++ b/vmm/src/clone3.rs @@ -1,7 +1,7 @@ // Copyright 2021 Alyssa Ross // SPDX-License-Identifier: Apache-2.0 -use libc::{c_long, size_t, syscall, SYS_clone3}; +use libc::{SYS_clone3, c_long, size_t, syscall}; pub const CLONE_CLEAR_SIGHAND: u64 = 0x100000000; diff --git a/vmm/src/config.rs b/vmm/src/config.rs index b66c71b8c6..d213b3a2d8 100644 --- a/vmm/src/config.rs +++ b/vmm/src/config.rs @@ -270,7 +270,9 @@ pub enum ValidationError { #[error("Invalid PCI segment aperture weight: {0}")] InvalidPciSegmentApertureWeight(u32), /// Invalid IOMMU address width in bits - #[error("IOMMU address width in bits ({0}) should be less than or equal to {MAX_IOMMU_ADDRESS_WIDTH_BITS}")] + #[error( + "IOMMU address width in bits ({0}) should be less than or equal to {MAX_IOMMU_ADDRESS_WIDTH_BITS}" + )] InvalidIommuAddressWidthBits(u8), /// Balloon too big #[error("Ballon size ({0}) greater than RAM ({1})")] @@ -1536,8 +1538,7 @@ impl RngConfig { } impl BalloonConfig { - pub const SYNTAX: &'static str = - "Balloon parameters \"size=,deflate_on_oom=on|off,\ + pub const SYNTAX: &'static str = "Balloon parameters \"size=,deflate_on_oom=on|off,\ free_page_reporting=on|off\""; pub fn parse(balloon: &str) -> Result { @@ -1911,8 +1912,7 @@ impl DebugConsoleConfig { } impl DeviceConfig { - pub const SYNTAX: &'static str = - "Direct device assignment parameters \"path=,iommu=on|off,id=,pci_segment=\""; + pub const SYNTAX: &'static str = "Direct device assignment parameters \"path=,iommu=on|off,id=,pci_segment=\""; pub fn parse(device: &str) -> Result { let mut parser = OptionParser::new(); @@ -2206,7 +2206,9 @@ where S: serde::Serializer, { if let Some(x) = x { - warn!("'RestoredNetConfig' contains FDs that can't be serialized correctly. Serializing them as invalid FDs."); + warn!( + "'RestoredNetConfig' contains FDs that can't be serialized correctly. Serializing them as invalid FDs." + ); let invalid_fds = vec![-1; x.len()]; s.serialize_some(&invalid_fds) } else { @@ -2222,7 +2224,9 @@ where { let invalid_fds: Option> = Option::deserialize(d)?; if let Some(invalid_fds) = invalid_fds { - warn!("'RestoredNetConfig' contains FDs that can't be deserialized correctly. Deserializing them as invalid FDs."); + warn!( + "'RestoredNetConfig' contains FDs that can't be deserialized correctly. Deserializing them as invalid FDs." + ); Ok(Some(vec![-1; invalid_fds.len()])) } else { Ok(None) @@ -3556,7 +3560,9 @@ mod tests { ); assert_eq!( - NetConfig::parse("mac=de:ad:be:ef:12:34,host_mac=12:34:de:ad:be:ef,num_queues=4,queue_size=1024,iommu=on")?, + NetConfig::parse( + "mac=de:ad:be:ef:12:34,host_mac=12:34:de:ad:be:ef,num_queues=4,queue_size=1024,iommu=on" + )?, NetConfig { num_queues: 4, queue_size: 1024, @@ -4814,18 +4820,19 @@ mod tests { )?, FwCfgConfig { items: Some(FwCfgItemList { - item_list: vec![FwCfgItem { - name: "opt/org.test/fw_cfg_test_item".to_string(), - file: PathBuf::from("/tmp/fw_cfg_test_item"), - }, - FwCfgItem { - name: "opt/org.test/fw_cfg_test_item2".to_string(), - file: PathBuf::from("/tmp/fw_cfg_test_item2"), - }] + item_list: vec![ + FwCfgItem { + name: "opt/org.test/fw_cfg_test_item".to_string(), + file: PathBuf::from("/tmp/fw_cfg_test_item"), + }, + FwCfgItem { + name: "opt/org.test/fw_cfg_test_item2".to_string(), + file: PathBuf::from("/tmp/fw_cfg_test_item2"), + } + ] }), ..Default::default() - }, - + }, ); Ok(()) } diff --git a/vmm/src/console_devices.rs b/vmm/src/console_devices.rs index c4137733bc..9f8d18ae7c 100644 --- a/vmm/src/console_devices.rs +++ b/vmm/src/console_devices.rs @@ -10,7 +10,7 @@ // SPDX-License-Identifier: Apache-2.0 AND BSD-3-Clause // -use std::fs::{read_link, File, OpenOptions}; +use std::fs::{File, OpenOptions, read_link}; use std::mem::zeroed; use std::os::fd::{AsRawFd, FromRawFd, RawFd}; use std::os::unix::fs::OpenOptionsExt; @@ -19,12 +19,12 @@ use std::path::PathBuf; use std::sync::{Arc, Mutex}; use std::{io, result}; -use libc::{cfmakeraw, isatty, tcgetattr, tcsetattr, termios, TCSANOW}; +use libc::{TCSANOW, cfmakeraw, isatty, tcgetattr, tcsetattr, termios}; use thiserror::Error; +use crate::Vmm; use crate::sigwinch_listener::listen_for_sigwinch_on_tty; use crate::vm_config::ConsoleOutputMode; -use crate::Vmm; const TIOCSPTLCK: libc::c_int = 0x4004_5431; const TIOCGPTPEER: libc::c_int = 0x5441; @@ -225,7 +225,7 @@ pub(crate) fn pre_create_console_devices(vmm: &mut Vmm) -> ConsoleDeviceResult { - return Err(ConsoleDeviceError::NoSocketOptionSupportForConsoleDevice) + return Err(ConsoleDeviceError::NoSocketOptionSupportForConsoleDevice); } ConsoleOutputMode::Null => ConsoleOutput::Null, ConsoleOutputMode::Off => ConsoleOutput::Off, @@ -288,7 +288,7 @@ pub(crate) fn pre_create_console_devices(vmm: &mut Vmm) -> ConsoleDeviceResult { - return Err(ConsoleDeviceError::NoSocketOptionSupportForConsoleDevice) + return Err(ConsoleDeviceError::NoSocketOptionSupportForConsoleDevice); } ConsoleOutputMode::Null => ConsoleOutput::Null, ConsoleOutputMode::Off => ConsoleOutput::Off, diff --git a/vmm/src/cpu.rs b/vmm/src/cpu.rs index 00d2468f39..5ae2f6da26 100644 --- a/vmm/src/cpu.rs +++ b/vmm/src/cpu.rs @@ -23,7 +23,7 @@ use std::{cmp, io, result, thread}; #[cfg(not(target_arch = "riscv64"))] use acpi_tables::sdt::Sdt; -use acpi_tables::{aml, Aml}; +use acpi_tables::{Aml, aml}; use anyhow::anyhow; #[cfg(target_arch = "x86_64")] use arch::x86_64::get_x2apic_id; @@ -34,30 +34,30 @@ use devices::interrupt_controller::InterruptController; #[cfg(all(target_arch = "aarch64", feature = "guest_debug"))] use gdbstub_arch::aarch64::reg::AArch64CoreRegs as CoreRegs; #[cfg(all(target_arch = "x86_64", feature = "guest_debug"))] -use gdbstub_arch::x86::reg::{X86SegmentRegs, X86_64CoreRegs as CoreRegs}; +use gdbstub_arch::x86::reg::{X86_64CoreRegs as CoreRegs, X86SegmentRegs}; +#[cfg(target_arch = "x86_64")] +use hypervisor::CpuVendor; +#[cfg(feature = "kvm")] +use hypervisor::HypervisorType; +#[cfg(feature = "guest_debug")] +use hypervisor::StandardRegisters; #[cfg(all(target_arch = "aarch64", feature = "guest_debug"))] use hypervisor::arch::aarch64::regs::{ID_AA64MMFR0_EL1, TCR_EL1, TTBR1_EL1}; -#[cfg(all(target_arch = "x86_64", feature = "guest_debug"))] -use hypervisor::arch::x86::msr_index; #[cfg(target_arch = "x86_64")] use hypervisor::arch::x86::CpuIdEntry; #[cfg(all(target_arch = "x86_64", feature = "guest_debug"))] use hypervisor::arch::x86::MsrEntry; #[cfg(all(target_arch = "x86_64", feature = "guest_debug"))] use hypervisor::arch::x86::SpecialRegisters; +#[cfg(all(target_arch = "x86_64", feature = "guest_debug"))] +use hypervisor::arch::x86::msr_index; #[cfg(feature = "tdx")] use hypervisor::kvm::{TdxExitDetails, TdxExitStatus}; -#[cfg(target_arch = "x86_64")] -use hypervisor::CpuVendor; -#[cfg(feature = "kvm")] -use hypervisor::HypervisorType; -#[cfg(feature = "guest_debug")] -use hypervisor::StandardRegisters; use hypervisor::{CpuState, HypervisorCpuError, VmExit, VmOps}; use libc::{c_void, siginfo_t}; #[cfg(all(target_arch = "x86_64", feature = "guest_debug"))] use linux_loader::elf::Elf64_Nhdr; -use seccompiler::{apply_filter, SeccompAction}; +use seccompiler::{SeccompAction, apply_filter}; use thiserror::Error; use tracer::trace_scoped; use vm_device::BusDevice; @@ -67,26 +67,26 @@ use vm_memory::ByteValued; use vm_memory::{Bytes, GuestAddressSpace}; use vm_memory::{GuestAddress, GuestMemoryAtomic}; use vm_migration::{ - snapshot_from_id, Migratable, MigratableError, Pausable, Snapshot, SnapshotData, Snapshottable, - Transportable, + Migratable, MigratableError, Pausable, Snapshot, SnapshotData, Snapshottable, Transportable, + snapshot_from_id, }; use vmm_sys_util::eventfd::EventFd; -use vmm_sys_util::signal::{register_signal_handler, SIGRTMIN}; +use vmm_sys_util::signal::{SIGRTMIN, register_signal_handler}; use zerocopy::{FromBytes, Immutable, IntoBytes}; #[cfg(all(target_arch = "x86_64", feature = "guest_debug"))] use crate::coredump::{ - CpuElf64Writable, CpuSegment, CpuState as DumpCpusState, DumpState, Elf64Writable, - GuestDebuggableError, NoteDescType, X86_64ElfPrStatus, X86_64UserRegs, COREDUMP_NAME_SIZE, - NT_PRSTATUS, + COREDUMP_NAME_SIZE, CpuElf64Writable, CpuSegment, CpuState as DumpCpusState, DumpState, + Elf64Writable, GuestDebuggableError, NT_PRSTATUS, NoteDescType, X86_64ElfPrStatus, + X86_64UserRegs, }; #[cfg(feature = "guest_debug")] -use crate::gdb::{get_raw_tid, Debuggable, DebuggableError}; -use crate::seccomp_filters::{get_seccomp_filter, Thread}; +use crate::gdb::{Debuggable, DebuggableError, get_raw_tid}; +use crate::seccomp_filters::{Thread, get_seccomp_filter}; #[cfg(target_arch = "x86_64")] use crate::vm::physical_bits; use crate::vm_config::CpusConfig; -use crate::{GuestMemoryMmap, CPU_MANAGER_SNAPSHOT_ID}; +use crate::{CPU_MANAGER_SNAPSHOT_ID, GuestMemoryMmap}; #[cfg(all(target_arch = "aarch64", feature = "guest_debug"))] /// Extract the specified bits of a 64-bit integer. @@ -1835,7 +1835,7 @@ impl CpuManager { _ => { return Err(Error::TranslateVirtualAddress(anyhow!(format!( "PA range not supported {pa_range}" - )))) + )))); } }; @@ -2858,8 +2858,8 @@ mod tests { use arch::layout::{BOOT_STACK_POINTER, ZERO_PAGE_START}; use arch::x86_64::interrupts::*; use arch::x86_64::regs::*; - use hypervisor::arch::x86::{FpuState, LapicState}; use hypervisor::StandardRegisters; + use hypervisor::arch::x86::{FpuState, LapicState}; use linux_loader::loader::bootparam::setup_header; #[test] @@ -2912,7 +2912,7 @@ mod tests { #[test] fn test_setup_msrs() { - use hypervisor::arch::x86::{msr_index, MsrEntry}; + use hypervisor::arch::x86::{MsrEntry, msr_index}; let hv = hypervisor::new().unwrap(); let vm = hv.create_vm().expect("new VM fd creation failed"); @@ -2997,6 +2997,7 @@ mod tests { use std::{mem, mem::offset_of}; use arch::layout; + use hypervisor::HypervisorCpuError; use hypervisor::arch::aarch64::regs::MPIDR_EL1; #[cfg(feature = "kvm")] use hypervisor::arm64_core_reg_id; @@ -3004,9 +3005,8 @@ mod tests { use hypervisor::kvm::aarch64::is_system_register; #[cfg(feature = "kvm")] use hypervisor::kvm::kvm_bindings::{ - user_pt_regs, KVM_REG_ARM64, KVM_REG_ARM64_SYSREG, KVM_REG_ARM_CORE, KVM_REG_SIZE_U64, + KVM_REG_ARM_CORE, KVM_REG_ARM64, KVM_REG_ARM64_SYSREG, KVM_REG_SIZE_U64, user_pt_regs, }; - use hypervisor::HypervisorCpuError; #[test] fn test_setup_regs() { diff --git a/vmm/src/device_manager.rs b/vmm/src/device_manager.rs index 256047e877..3bc814fd77 100644 --- a/vmm/src/device_manager.rs +++ b/vmm/src/device_manager.rs @@ -11,7 +11,7 @@ use std::collections::{BTreeMap, BTreeSet, HashMap}; use std::fs::{File, OpenOptions}; -use std::io::{self, stdout, IsTerminal, Seek, SeekFrom}; +use std::io::{self, IsTerminal, Seek, SeekFrom, stdout}; use std::num::Wrapping; use std::os::unix::fs::OpenOptionsExt; use std::os::unix::io::{AsRawFd, FromRawFd}; @@ -23,14 +23,14 @@ use std::time::Instant; use acpi_tables::sdt::GenericAddress; #[cfg(not(target_arch = "riscv64"))] -use acpi_tables::{aml, Aml}; +use acpi_tables::{Aml, aml}; #[cfg(not(target_arch = "riscv64"))] use anyhow::anyhow; #[cfg(target_arch = "x86_64")] use arch::layout::{APIC_START, IOAPIC_SIZE, IOAPIC_START}; -use arch::{layout, NumaNodes}; #[cfg(any(target_arch = "aarch64", target_arch = "riscv64"))] use arch::{DeviceType, MmioDeviceInfo}; +use arch::{NumaNodes, layout}; use block::async_io::DiskFile; use block::fixed_vhd_sync::FixedVhdDiskSync; use block::qcow_sync::QcowDiskSync; @@ -38,7 +38,7 @@ use block::raw_async_aio::RawFileDiskAio; use block::raw_sync::RawFileDiskSync; use block::vhdx_sync::VhdxDiskSync; use block::{ - block_aio_is_supported, block_io_uring_is_supported, detect_image_type, qcow, vhdx, ImageType, + ImageType, block_aio_is_supported, block_io_uring_is_supported, detect_image_type, qcow, vhdx, }; #[cfg(feature = "io_uring")] use block::{fixed_vhd_async::FixedVhdDiskAsync, raw_async::RawFileDisk}; @@ -55,26 +55,26 @@ use devices::interrupt_controller::InterruptController; use devices::ioapic; #[cfg(feature = "ivshmem")] use devices::ivshmem::{IvshmemError, IvshmemOps}; -#[cfg(all(feature = "fw_cfg", target_arch = "x86_64"))] -use devices::legacy::fw_cfg::FW_CFG_ACPI_ID; #[cfg(target_arch = "aarch64")] use devices::legacy::Pl011; #[cfg(any(target_arch = "x86_64", target_arch = "riscv64"))] use devices::legacy::Serial; +#[cfg(all(feature = "fw_cfg", target_arch = "x86_64"))] +use devices::legacy::fw_cfg::FW_CFG_ACPI_ID; #[cfg(feature = "fw_cfg")] use devices::legacy::{ - fw_cfg::{PORT_FW_CFG_BASE, PORT_FW_CFG_WIDTH}, FwCfg, + fw_cfg::{PORT_FW_CFG_BASE, PORT_FW_CFG_WIDTH}, }; #[cfg(feature = "pvmemcontrol")] use devices::pvmemcontrol::{PvmemcontrolBusDevice, PvmemcontrolPciDevice}; -use devices::{interrupt_controller, AcpiNotificationFlags}; +use devices::{AcpiNotificationFlags, interrupt_controller}; +use hypervisor::IoEventAddress; #[cfg(target_arch = "aarch64")] use hypervisor::arch::aarch64::regs::AARCH64_PMU_IRQ; -use hypervisor::IoEventAddress; use libc::{ - tcsetattr, termios, MAP_NORESERVE, MAP_PRIVATE, MAP_SHARED, O_TMPFILE, PROT_READ, PROT_WRITE, - TCSANOW, + MAP_NORESERVE, MAP_PRIVATE, MAP_SHARED, O_TMPFILE, PROT_READ, PROT_WRITE, TCSANOW, tcsetattr, + termios, }; use pci::{ DeviceRelocation, MmioRegion, PciBarRegionType, PciBdf, PciDevice, VfioDmaMapping, @@ -104,27 +104,27 @@ use vm_memory::{Address, GuestAddress, GuestMemoryRegion, GuestUsize, MmapRegion use vm_memory::{GuestAddressSpace, GuestMemory}; use vm_migration::protocol::MemoryRangeTable; use vm_migration::{ - snapshot_from_id, state_from_id, Migratable, MigratableError, Pausable, Snapshot, SnapshotData, - Snapshottable, Transportable, + Migratable, MigratableError, Pausable, Snapshot, SnapshotData, Snapshottable, Transportable, + snapshot_from_id, state_from_id, }; use vm_virtio::{AccessPlatform, VirtioDeviceType}; use vmm_sys_util::eventfd::EventFd; use crate::console_devices::{ConsoleDeviceError, ConsoleInfo, ConsoleOutput}; -use crate::cpu::{CpuManager, CPU_MANAGER_ACPI_SIZE}; +use crate::cpu::{CPU_MANAGER_ACPI_SIZE, CpuManager}; use crate::device_tree::{DeviceNode, DeviceTree}; use crate::interrupt::{LegacyUserspaceInterruptManager, MsiInterruptManager}; -use crate::memory_manager::{Error as MemoryManagerError, MemoryManager, MEMORY_MANAGER_ACPI_SIZE}; +use crate::memory_manager::{Error as MemoryManagerError, MEMORY_MANAGER_ACPI_SIZE, MemoryManager}; use crate::pci_segment::PciSegment; use crate::serial_manager::{Error as SerialManagerError, SerialManager}; #[cfg(feature = "ivshmem")] use crate::vm_config::IvshmemConfig; use crate::vm_config::{ - ConsoleOutputMode, DeviceConfig, DiskConfig, FsConfig, NetConfig, PmemConfig, UserDeviceConfig, - VdpaConfig, VhostMode, VmConfig, VsockConfig, DEFAULT_IOMMU_ADDRESS_WIDTH_BITS, - DEFAULT_PCI_SEGMENT_APERTURE_WEIGHT, + ConsoleOutputMode, DEFAULT_IOMMU_ADDRESS_WIDTH_BITS, DEFAULT_PCI_SEGMENT_APERTURE_WEIGHT, + DeviceConfig, DiskConfig, FsConfig, NetConfig, PmemConfig, UserDeviceConfig, VdpaConfig, + VhostMode, VmConfig, VsockConfig, }; -use crate::{device_node, GuestRegionMmap, PciDeviceInfo, DEVICE_MANAGER_SNAPSHOT_ID}; +use crate::{DEVICE_MANAGER_SNAPSHOT_ID, GuestRegionMmap, PciDeviceInfo, device_node}; #[cfg(any(target_arch = "aarch64", target_arch = "riscv64"))] const MMIO_LEN: u64 = 0x1000; diff --git a/vmm/src/gdb.rs b/vmm/src/gdb.rs index 16c9f64d98..ef4f4de8fd 100644 --- a/vmm/src/gdb.rs +++ b/vmm/src/gdb.rs @@ -11,24 +11,24 @@ use std::sync::mpsc; use gdbstub::arch::Arch; use gdbstub::common::{Signal, Tid}; use gdbstub::conn::{Connection, ConnectionExt}; -use gdbstub::stub::{run_blocking, DisconnectReason, MultiThreadStopReason}; +use gdbstub::stub::{DisconnectReason, MultiThreadStopReason, run_blocking}; +use gdbstub::target::ext::base::BaseOps; use gdbstub::target::ext::base::multithread::{ MultiThreadBase, MultiThreadResume, MultiThreadResumeOps, MultiThreadSingleStep, MultiThreadSingleStepOps, }; -use gdbstub::target::ext::base::BaseOps; use gdbstub::target::ext::breakpoints::{ Breakpoints, BreakpointsOps, HwBreakpoint, HwBreakpointOps, }; use gdbstub::target::{Target, TargetError, TargetResult}; #[cfg(target_arch = "aarch64")] -use gdbstub_arch::aarch64::reg::AArch64CoreRegs as CoreRegs; -#[cfg(target_arch = "aarch64")] use gdbstub_arch::aarch64::AArch64 as GdbArch; -#[cfg(target_arch = "x86_64")] -use gdbstub_arch::x86::reg::X86_64CoreRegs as CoreRegs; +#[cfg(target_arch = "aarch64")] +use gdbstub_arch::aarch64::reg::AArch64CoreRegs as CoreRegs; #[cfg(target_arch = "x86_64")] use gdbstub_arch::x86::X86_64_SSE as GdbArch; +#[cfg(target_arch = "x86_64")] +use gdbstub_arch::x86::reg::X86_64CoreRegs as CoreRegs; use thiserror::Error; use vm_memory::{GuestAddress, GuestMemoryAtomic, GuestMemoryError}; diff --git a/vmm/src/igvm/igvm_loader.rs b/vmm/src/igvm/igvm_loader.rs index 805bb94b41..97297a0920 100644 --- a/vmm/src/igvm/igvm_loader.rs +++ b/vmm/src/igvm/igvm_loader.rs @@ -10,21 +10,21 @@ use std::sync::{Arc, Mutex}; use igvm::snp_defs::SevVmsa; use igvm::{IgvmDirectiveHeader, IgvmFile, IgvmPlatformHeader, IsolationType}; +#[cfg(feature = "sev_snp")] +use igvm_defs::{IGVM_VHS_MEMORY_MAP_ENTRY, MemoryMapEntryType}; use igvm_defs::{ - IgvmPageDataType, IgvmPlatformType, IGVM_VHS_PARAMETER, IGVM_VHS_PARAMETER_INSERT, + IGVM_VHS_PARAMETER, IGVM_VHS_PARAMETER_INSERT, IgvmPageDataType, IgvmPlatformType, }; -#[cfg(feature = "sev_snp")] -use igvm_defs::{MemoryMapEntryType, IGVM_VHS_MEMORY_MAP_ENTRY}; use mshv_bindings::*; use thiserror::Error; use zerocopy::IntoBytes; +#[cfg(feature = "sev_snp")] +use crate::GuestMemoryMmap; use crate::cpu::CpuManager; use crate::igvm::loader::Loader; -use crate::igvm::{BootPageAcceptance, IgvmLoadedInfo, StartupMemoryType, HV_PAGE_SIZE}; +use crate::igvm::{BootPageAcceptance, HV_PAGE_SIZE, IgvmLoadedInfo, StartupMemoryType}; use crate::memory_manager::MemoryManager; -#[cfg(feature = "sev_snp")] -use crate::GuestMemoryMmap; #[derive(Debug, Error)] pub enum Error { diff --git a/vmm/src/igvm/loader.rs b/vmm/src/igvm/loader.rs index 215c84c50e..316cadb1ac 100644 --- a/vmm/src/igvm/loader.rs +++ b/vmm/src/igvm/loader.rs @@ -10,7 +10,7 @@ use vm_memory::{ GuestMemoryRegion, }; -use crate::igvm::{BootPageAcceptance, StartupMemoryType, HV_PAGE_SIZE}; +use crate::igvm::{BootPageAcceptance, HV_PAGE_SIZE, StartupMemoryType}; /// Structure to hold the guest memory info/layout to check /// the if the memory is accepted within the layout. diff --git a/vmm/src/landlock.rs b/vmm/src/landlock.rs index b7fbf6edb7..e7efd9cbcc 100644 --- a/vmm/src/landlock.rs +++ b/vmm/src/landlock.rs @@ -9,8 +9,8 @@ use std::path::Path; #[cfg(test)] use landlock::make_bitflags; use landlock::{ - path_beneath_rules, Access, AccessFs, BitFlags, Compatible, Ruleset, RulesetAttr, - RulesetCreated, RulesetCreatedAttr, RulesetError, ABI, + ABI, Access, AccessFs, BitFlags, Compatible, Ruleset, RulesetAttr, RulesetCreated, + RulesetCreatedAttr, RulesetError, path_beneath_rules, }; use thiserror::Error; @@ -59,7 +59,7 @@ impl TryFrom<&str> for LandlockAccess { _ => { return Err(LandlockError::InvalidLandlockAccess( format!("Invalid access: {c}").to_string(), - )) + )); } }; } diff --git a/vmm/src/lib.rs b/vmm/src/lib.rs index cb430728b8..ef23793088 100644 --- a/vmm/src/lib.rs +++ b/vmm/src/lib.rs @@ -10,7 +10,7 @@ extern crate log; use std::collections::HashMap; use std::fs::File; -use std::io::{stdout, Read, Write}; +use std::io::{Read, Write, stdout}; use std::net::{TcpListener, TcpStream}; use std::os::unix::io::{AsRawFd, FromRawFd, RawFd}; use std::os::unix::net::{UnixListener, UnixStream}; @@ -29,12 +29,12 @@ use api::dbus::{DBusApiOptions, DBusApiShutdownChannels}; use api::http::HttpApiHandle; #[cfg(all(feature = "kvm", target_arch = "x86_64"))] use arch::x86_64::MAX_SUPPORTED_CPUS_LEGACY; -use console_devices::{pre_create_console_devices, ConsoleInfo}; +use console_devices::{ConsoleInfo, pre_create_console_devices}; use landlock::LandlockError; -use libc::{tcsetattr, termios, EFD_NONBLOCK, SIGINT, SIGTERM, TCSANOW}; +use libc::{EFD_NONBLOCK, SIGINT, SIGTERM, TCSANOW, tcsetattr, termios}; use memory_manager::MemoryManagerSnapshotData; use pci::PciBdf; -use seccompiler::{apply_filter, SeccompAction}; +use seccompiler::{SeccompAction, apply_filter}; use serde::ser::{SerializeStruct, Serializer}; use serde::{Deserialize, Serialize}; use signal_hook::iterator::{Handle, Signals}; @@ -52,7 +52,7 @@ use crate::api::{ ApiRequest, ApiResponse, RequestHandler, VmInfoResponse, VmReceiveMigrationData, VmSendMigrationData, VmmPingResponse, }; -use crate::config::{add_to_config, RestoreConfig}; +use crate::config::{RestoreConfig, add_to_config}; #[cfg(all(target_arch = "x86_64", feature = "guest_debug"))] use crate::coredump::GuestDebuggable; use crate::landlock::Landlock; @@ -60,7 +60,7 @@ use crate::memory_manager::MemoryManager; #[cfg(all(feature = "kvm", target_arch = "x86_64"))] use crate::migration::get_vm_snapshot; use crate::migration::{recv_vm_config, recv_vm_state}; -use crate::seccomp_filters::{get_seccomp_filter, Thread}; +use crate::seccomp_filters::{Thread, get_seccomp_filter}; use crate::vm::{Error as VmError, Vm, VmState}; use crate::vm_config::{ DeviceConfig, DiskConfig, FsConfig, NetConfig, PmemConfig, UserDeviceConfig, VdpaConfig, @@ -2469,14 +2469,15 @@ mod unit_tests { )); let _ = vmm.vm_create(create_dummy_vm_config()); - assert!(vmm - .vm_config - .as_ref() - .unwrap() - .lock() - .unwrap() - .devices - .is_none()); + assert!( + vmm.vm_config + .as_ref() + .unwrap() + .lock() + .unwrap() + .devices + .is_none() + ); assert!(vmm.vm_add_device(device_config.clone()).unwrap().is_none()); assert_eq!( @@ -2516,19 +2517,21 @@ mod unit_tests { )); let _ = vmm.vm_create(create_dummy_vm_config()); - assert!(vmm - .vm_config - .as_ref() - .unwrap() - .lock() - .unwrap() - .user_devices - .is_none()); + assert!( + vmm.vm_config + .as_ref() + .unwrap() + .lock() + .unwrap() + .user_devices + .is_none() + ); - assert!(vmm - .vm_add_user_device(user_device_config.clone()) - .unwrap() - .is_none()); + assert!( + vmm.vm_add_user_device(user_device_config.clone()) + .unwrap() + .is_none() + ); assert_eq!( vmm.vm_config .as_ref() @@ -2565,14 +2568,15 @@ mod unit_tests { )); let _ = vmm.vm_create(create_dummy_vm_config()); - assert!(vmm - .vm_config - .as_ref() - .unwrap() - .lock() - .unwrap() - .disks - .is_none()); + assert!( + vmm.vm_config + .as_ref() + .unwrap() + .lock() + .unwrap() + .disks + .is_none() + ); assert!(vmm.vm_add_disk(disk_config.clone()).unwrap().is_none()); assert_eq!( @@ -2650,14 +2654,15 @@ mod unit_tests { )); let _ = vmm.vm_create(create_dummy_vm_config()); - assert!(vmm - .vm_config - .as_ref() - .unwrap() - .lock() - .unwrap() - .pmem - .is_none()); + assert!( + vmm.vm_config + .as_ref() + .unwrap() + .lock() + .unwrap() + .pmem + .is_none() + ); assert!(vmm.vm_add_pmem(pmem_config.clone()).unwrap().is_none()); assert_eq!( @@ -2699,14 +2704,15 @@ mod unit_tests { )); let _ = vmm.vm_create(create_dummy_vm_config()); - assert!(vmm - .vm_config - .as_ref() - .unwrap() - .lock() - .unwrap() - .net - .is_none()); + assert!( + vmm.vm_config + .as_ref() + .unwrap() + .lock() + .unwrap() + .net + .is_none() + ); assert!(vmm.vm_add_net(net_config.clone()).unwrap().is_none()); assert_eq!( @@ -2745,14 +2751,15 @@ mod unit_tests { )); let _ = vmm.vm_create(create_dummy_vm_config()); - assert!(vmm - .vm_config - .as_ref() - .unwrap() - .lock() - .unwrap() - .vdpa - .is_none()); + assert!( + vmm.vm_config + .as_ref() + .unwrap() + .lock() + .unwrap() + .vdpa + .is_none() + ); assert!(vmm.vm_add_vdpa(vdpa_config.clone()).unwrap().is_none()); assert_eq!( @@ -2791,14 +2798,15 @@ mod unit_tests { )); let _ = vmm.vm_create(create_dummy_vm_config()); - assert!(vmm - .vm_config - .as_ref() - .unwrap() - .lock() - .unwrap() - .vsock - .is_none()); + assert!( + vmm.vm_config + .as_ref() + .unwrap() + .lock() + .unwrap() + .vsock + .is_none() + ); assert!(vmm.vm_add_vsock(vsock_config.clone()).unwrap().is_none()); assert_eq!( diff --git a/vmm/src/memory_manager.rs b/vmm/src/memory_manager.rs index 461eb24b9a..15225e9dc7 100644 --- a/vmm/src/memory_manager.rs +++ b/vmm/src/memory_manager.rs @@ -17,7 +17,7 @@ use std::sync::atomic::{AtomicU32, Ordering}; use std::sync::{Arc, Barrier, Mutex}; use std::{ffi, result, thread}; -use acpi_tables::{aml, Aml}; +use acpi_tables::{Aml, aml}; use anyhow::anyhow; use arch::RegionType; #[cfg(target_arch = "x86_64")] @@ -278,7 +278,9 @@ pub enum Error { /// It's invalid to try applying a NUMA policy to a memory zone that is /// memory mapped with MAP_SHARED. - #[error("Invalid to try applying a NUMA policy to a memory zone that is memory mapped with MAP_SHARED")] + #[error( + "Invalid to try applying a NUMA policy to a memory zone that is memory mapped with MAP_SHARED" + )] InvalidSharedMemoryZoneWithHostNuma, /// Failed applying NUMA memory policy. @@ -2611,7 +2613,7 @@ impl Migratable for MemoryManager { return Err(MigratableError::MigrateSend(anyhow!( "Error finding 'guest memory region' with address {:x}", r.gpa - ))) + ))); } }; diff --git a/vmm/src/pci_segment.rs b/vmm/src/pci_segment.rs index 010859e05f..345869c1da 100644 --- a/vmm/src/pci_segment.rs +++ b/vmm/src/pci_segment.rs @@ -11,11 +11,11 @@ use std::sync::{Arc, Mutex}; -use acpi_tables::{aml, Aml}; +use acpi_tables::{Aml, aml}; use arch::layout; use pci::{DeviceRelocation, PciBdf, PciBus, PciConfigMmio, PciRoot}; #[cfg(target_arch = "x86_64")] -use pci::{PciConfigIo, PCI_CONFIG_IO_PORT, PCI_CONFIG_IO_PORT_SIZE}; +use pci::{PCI_CONFIG_IO_PORT, PCI_CONFIG_IO_PORT_SIZE, PciConfigIo}; use uuid::Uuid; use vm_allocator::AddressAllocator; use vm_device::BusDeviceSync; @@ -105,7 +105,12 @@ impl PciSegment { info!( "Adding PCI segment: id={}, PCI MMIO config address: 0x{:x}, mem32 area [0x{:x}-0x{:x}, mem64 area [0x{:x}-0x{:x}", - segment.id, segment.mmio_config_address, segment.start_of_mem32_area, segment.end_of_mem32_area, segment.start_of_mem64_area, segment.end_of_mem64_area + segment.id, + segment.mmio_config_address, + segment.start_of_mem32_area, + segment.end_of_mem32_area, + segment.start_of_mem64_area, + segment.end_of_mem64_area ); Ok(segment) } diff --git a/vmm/src/sigwinch_listener.rs b/vmm/src/sigwinch_listener.rs index 104a680a63..b50e93c986 100644 --- a/vmm/src/sigwinch_listener.rs +++ b/vmm/src/sigwinch_listener.rs @@ -3,10 +3,10 @@ use std::cell::RefCell; use std::collections::BTreeSet; -use std::fs::{read_dir, File}; +use std::fs::{File, read_dir}; use std::io::{self, ErrorKind, Read, Write}; use std::iter::once; -use std::mem::{size_of, MaybeUninit}; +use std::mem::{MaybeUninit, size_of}; use std::os::unix::prelude::*; use std::process::exit; use std::ptr::null_mut; @@ -14,15 +14,16 @@ use std::ptr::null_mut; use arch::_NSIG; use hypervisor::HypervisorType; use libc::{ - c_int, c_void, close, fork, getpgrp, ioctl, pipe2, poll, pollfd, setsid, sigemptyset, - siginfo_t, signal, sigprocmask, syscall, tcgetpgrp, tcsetpgrp, SYS_close_range, EINVAL, ENOSYS, - ENOTTY, O_CLOEXEC, POLLERR, SIGCHLD, SIGWINCH, SIG_DFL, SIG_SETMASK, STDERR_FILENO, TIOCSCTTY, + EINVAL, ENOSYS, ENOTTY, O_CLOEXEC, POLLERR, SIG_DFL, SIG_SETMASK, SIGCHLD, SIGWINCH, + STDERR_FILENO, SYS_close_range, TIOCSCTTY, c_int, c_void, close, fork, getpgrp, ioctl, pipe2, + poll, pollfd, setsid, sigemptyset, siginfo_t, signal, sigprocmask, syscall, tcgetpgrp, + tcsetpgrp, }; -use seccompiler::{apply_filter, BpfProgram, SeccompAction}; +use seccompiler::{BpfProgram, SeccompAction, apply_filter}; use vmm_sys_util::signal::register_signal_handler; -use crate::clone3::{clone3, clone_args, CLONE_CLEAR_SIGHAND}; -use crate::seccomp_filters::{get_seccomp_filter, Thread}; +use crate::clone3::{CLONE_CLEAR_SIGHAND, clone_args, clone3}; +use crate::seccomp_filters::{Thread, get_seccomp_filter}; thread_local! { // The tty file descriptor is stored in a global variable so it diff --git a/vmm/src/vm.rs b/vmm/src/vm.rs index d5e1e808d6..0e85b1ddc1 100644 --- a/vmm/src/vm.rs +++ b/vmm/src/vm.rs @@ -25,20 +25,20 @@ use std::time::Instant; use std::{cmp, result, str, thread}; use anyhow::anyhow; +#[cfg(any(target_arch = "aarch64", target_arch = "riscv64"))] +use arch::PciSpaceInfo; #[cfg(target_arch = "x86_64")] use arch::layout::{KVM_IDENTITY_MAP_START, KVM_TSS_START}; -#[cfg(feature = "tdx")] -use arch::x86_64::tdx::TdvfSection; #[cfg(all(feature = "kvm", target_arch = "x86_64"))] use arch::x86_64::MAX_SUPPORTED_CPUS_LEGACY; -#[cfg(any(target_arch = "aarch64", target_arch = "riscv64"))] -use arch::PciSpaceInfo; -use arch::{get_host_cpu_phys_bits, EntryPoint, NumaNode, NumaNodes}; +#[cfg(feature = "tdx")] +use arch::x86_64::tdx::TdvfSection; +use arch::{EntryPoint, NumaNode, NumaNodes, get_host_cpu_phys_bits}; +use devices::AcpiNotificationFlags; #[cfg(target_arch = "aarch64")] use devices::interrupt_controller; #[cfg(feature = "fw_cfg")] use devices::legacy::fw_cfg::FwCfgItem; -use devices::AcpiNotificationFlags; #[cfg(all(target_arch = "aarch64", feature = "guest_debug"))] use gdbstub_arch::aarch64::reg::AArch64CoreRegs as CoreRegs; #[cfg(all(target_arch = "x86_64", feature = "guest_debug"))] @@ -46,17 +46,17 @@ use gdbstub_arch::x86::reg::X86_64CoreRegs as CoreRegs; #[cfg(target_arch = "aarch64")] use hypervisor::arch::aarch64::regs::AARCH64_PMU_IRQ; use hypervisor::{HypervisorVmError, VmOps}; -use libc::{termios, SIGWINCH}; +use libc::{SIGWINCH, termios}; use linux_loader::cmdline::Cmdline; #[cfg(all(target_arch = "x86_64", feature = "guest_debug"))] use linux_loader::elf; +use linux_loader::loader::KernelLoader; #[cfg(target_arch = "x86_64")] use linux_loader::loader::bzimage::BzImage; #[cfg(target_arch = "x86_64")] use linux_loader::loader::elf::PvhBootCapability::PvhEntryPresent; #[cfg(any(target_arch = "aarch64", target_arch = "riscv64"))] use linux_loader::loader::pe::Error::InvalidImageMagicNumber; -use linux_loader::loader::KernelLoader; use seccompiler::SeccompAction; use serde::{Deserialize, Serialize}; use thiserror::Error; @@ -69,12 +69,12 @@ use vm_memory::{ }; use vm_migration::protocol::{MemoryRangeTable, Request, Response}; use vm_migration::{ - snapshot_from_id, Migratable, MigratableError, Pausable, Snapshot, Snapshottable, Transportable, + Migratable, MigratableError, Pausable, Snapshot, Snapshottable, Transportable, snapshot_from_id, }; use vmm_sys_util::eventfd::EventFd; use vmm_sys_util::sock_ctrl_msg::ScmSocket; -use crate::config::{add_to_config, ValidationError}; +use crate::config::{ValidationError, add_to_config}; use crate::console_devices::{ConsoleDeviceError, ConsoleInfo}; #[cfg(all(target_arch = "x86_64", feature = "guest_debug"))] use crate::coredump::{ @@ -94,7 +94,7 @@ use crate::memory_manager::{ use crate::migration::get_vm_snapshot; #[cfg(all(target_arch = "x86_64", feature = "guest_debug"))] use crate::migration::url_to_file; -use crate::migration::{url_to_path, SNAPSHOT_CONFIG_FILE, SNAPSHOT_STATE_FILE}; +use crate::migration::{SNAPSHOT_CONFIG_FILE, SNAPSHOT_STATE_FILE, url_to_path}; #[cfg(feature = "fw_cfg")] use crate::vm_config::FwCfgConfig; use crate::vm_config::{ @@ -102,8 +102,8 @@ use crate::vm_config::{ PmemConfig, UserDeviceConfig, VdpaConfig, VmConfig, VsockConfig, }; use crate::{ - cpu, GuestMemoryMmap, PciDeviceInfo, CPU_MANAGER_SNAPSHOT_ID, DEVICE_MANAGER_SNAPSHOT_ID, - MEMORY_MANAGER_SNAPSHOT_ID, + CPU_MANAGER_SNAPSHOT_ID, DEVICE_MANAGER_SNAPSHOT_ID, GuestMemoryMmap, + MEMORY_MANAGER_SNAPSHOT_ID, PciDeviceInfo, cpu, }; /// Errors associated with VM management @@ -1315,10 +1315,7 @@ impl Vm { return Self::load_igvm(igvm, memory_manager, cpu_manager); } } - match ( - &payload.firmware, - &payload.kernel, - ) { + match (&payload.firmware, &payload.kernel) { (Some(firmware), None) => { let firmware = File::open(firmware).map_err(Error::FirmwareFile)?; Self::load_kernel(firmware, None, memory_manager) @@ -1328,7 +1325,9 @@ impl Vm { let cmdline = Self::generate_cmdline(payload)?; Self::load_kernel(kernel, Some(cmdline), memory_manager) } - _ => unreachable!("Unsupported boot configuration: programming error from 'PayloadConfigError::validate()'"), + _ => unreachable!( + "Unsupported boot configuration: programming error from 'PayloadConfigError::validate()'" + ), } } @@ -1346,7 +1345,9 @@ impl Vm { let kernel = File::open(kernel).map_err(Error::KernelFile)?; Self::load_kernel(kernel, memory_manager) } - _ => unreachable!("Unsupported boot configuration: programming error from 'PayloadConfigError::validate()'"), + _ => unreachable!( + "Unsupported boot configuration: programming error from 'PayloadConfigError::validate()'" + ), } } diff --git a/vmm/src/vm_config.rs b/vmm/src/vm_config.rs index 96269bfb27..7a581307e7 100644 --- a/vmm/src/vm_config.rs +++ b/vmm/src/vm_config.rs @@ -13,8 +13,8 @@ use serde::{Deserialize, Serialize}; use thiserror::Error; use virtio_devices::RateLimiterConfig; -use crate::landlock::LandlockError; use crate::Landlock; +use crate::landlock::LandlockError; pub type LandlockResult = result::Result; @@ -353,12 +353,16 @@ pub fn default_netconfig_tap() -> Option { } pub fn default_netconfig_ip() -> IpAddr { - warn!("Deprecation warning: No IP address provided. A default IP address is assigned. This behavior will be deprecated soon."); + warn!( + "Deprecation warning: No IP address provided. A default IP address is assigned. This behavior will be deprecated soon." + ); IpAddr::V4(Ipv4Addr::new(192, 168, 249, 1)) } pub fn default_netconfig_mask() -> IpAddr { - warn!("Deprecation warning: No network mask provided. A default network mask is assigned. This behavior will be deprecated soon."); + warn!( + "Deprecation warning: No network mask provided. A default network mask is assigned. This behavior will be deprecated soon." + ); IpAddr::V4(Ipv4Addr::new(255, 255, 255, 0)) } @@ -383,7 +387,9 @@ where S: serde::Serializer, { if let Some(x) = x { - warn!("'NetConfig' contains FDs that can't be serialized correctly. Serializing them as invalid FDs."); + warn!( + "'NetConfig' contains FDs that can't be serialized correctly. Serializing them as invalid FDs." + ); let invalid_fds = vec![-1; x.len()]; s.serialize_some(&invalid_fds) } else { @@ -397,7 +403,9 @@ where { let invalid_fds: Option> = Option::deserialize(d)?; if let Some(invalid_fds) = invalid_fds { - warn!("'NetConfig' contains FDs that can't be deserialized correctly. Deserializing them as invalid FDs."); + warn!( + "'NetConfig' contains FDs that can't be deserialized correctly. Deserializing them as invalid FDs." + ); Ok(Some(vec![-1; invalid_fds.len()])) } else { Ok(None) From f73a6c8d8e0abf4400656b9c26ac28f832e3e92b Mon Sep 17 00:00:00 2001 From: Philipp Schuster Date: Fri, 15 Aug 2025 08:58:55 +0200 Subject: [PATCH 171/294] build: treewide: clippy for edition 2024 This commit includes all simple clippy fixes excluding the collapsing of nested ifs using the let-chains feature. This follows in the next commit. Signed-off-by: Philipp Schuster On-behalf-of: SAP philipp.schuster@sap.com --- tests/integration.rs | 3 +-- vmm/src/config.rs | 1 + 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/integration.rs b/tests/integration.rs index bdb258a22b..e18d7fb7b1 100644 --- a/tests/integration.rs +++ b/tests/integration.rs @@ -522,8 +522,7 @@ fn temp_snapshot_dir_path(tmp_dir: &TempDir) -> String { } fn temp_vmcore_file_path(tmp_dir: &TempDir) -> String { - let vmcore_file = String::from(tmp_dir.as_path().join("vmcore").to_str().unwrap()); - vmcore_file + String::from(tmp_dir.as_path().join("vmcore").to_str().unwrap()) } // Creates the path for direct kernel boot and return the path. diff --git a/vmm/src/config.rs b/vmm/src/config.rs index d213b3a2d8..1614dc0b51 100644 --- a/vmm/src/config.rs +++ b/vmm/src/config.rs @@ -636,6 +636,7 @@ impl CpusConfig { // list as it will always be checked for. #[allow(unused_mut)] let mut features = CpuFeatures::default(); + #[allow(clippy::never_loop)] for s in features_list.0 { match >::as_ref(&s) { #[cfg(target_arch = "x86_64")] From c995b72384ea532e237878beb65be692923254b7 Mon Sep 17 00:00:00 2001 From: Philipp Schuster Date: Fri, 15 Aug 2025 09:00:01 +0200 Subject: [PATCH 172/294] build: treewide: clippy: collapse nested ifs, use let chains This bumps the MSRV to 1.88 (also, Rust edition 2024 is mandatory). Signed-off-by: Philipp Schuster On-behalf-of: SAP philipp.schuster@sap.com --- api_client/src/lib.rs | 11 +- arch/src/aarch64/fdt.rs | 66 +++---- arch/src/x86_64/mod.rs | 49 ++--- block/src/qcow/mod.rs | 33 ++-- block/src/qcow/vec_cache.rs | 8 +- block/src/vhdx/mod.rs | 10 +- build.rs | 15 +- devices/src/tpm.rs | 5 +- hypervisor/src/arch/x86/emulator/mod.rs | 10 +- hypervisor/src/mshv/x86_64/emulator.rs | 24 +-- net_util/src/tap.rs | 10 +- pci/src/configuration.rs | 34 ++-- pci/src/msi.rs | 12 +- pci/src/vfio.rs | 180 +++++++++--------- pci/src/vfio_user.rs | 16 +- rate_limiter/src/group.rs | 8 +- src/main.rs | 8 +- test_infra/src/lib.rs | 24 +-- virtio-devices/src/console.rs | 19 +- virtio-devices/src/iommu.rs | 25 ++- virtio-devices/src/mem.rs | 8 +- virtio-devices/src/net.rs | 8 +- virtio-devices/src/thread_helper.rs | 12 +- .../src/transport/pci_common_config.rs | 52 +++-- virtio-devices/src/transport/pci_device.rs | 21 +- virtio-devices/src/vhost_user/blk.rs | 35 ++-- virtio-devices/src/vhost_user/fs.rs | 18 +- virtio-devices/src/vhost_user/net.rs | 35 ++-- .../src/vhost_user/vu_common_ctrl.rs | 17 +- virtio-devices/src/vsock/unix/muxer.rs | 8 +- virtio-devices/src/vsock/unix/muxer_killq.rs | 9 +- vm-allocator/src/address.rs | 8 +- vmm/src/config.rs | 145 +++++++------- vmm/src/cpu.rs | 13 +- vmm/src/device_manager.rs | 141 +++++++------- vmm/src/igvm/igvm_loader.rs | 10 +- vmm/src/lib.rs | 29 ++- vmm/src/memory_manager.rs | 32 ++-- vmm/src/serial_manager.rs | 12 +- vmm/src/vm.rs | 12 +- 40 files changed, 579 insertions(+), 613 deletions(-) diff --git a/api_client/src/lib.rs b/api_client/src/lib.rs index 52e85a3367..0ee7fa1d0b 100644 --- a/api_client/src/lib.rs +++ b/api_client/src/lib.rs @@ -118,12 +118,11 @@ fn parse_http_response(socket: &mut dyn Read) -> Result, Error> { } } - if let Some(body_offset) = body_offset { - if let Some(content_length) = content_length { - if res.len() >= content_length + body_offset { - break; - } - } + if let Some(body_offset) = body_offset + && let Some(content_length) = content_length + && res.len() >= content_length + body_offset + { + break; } } let body_string = content_length.and(body_offset.map(|o| String::from(&res[o..]))); diff --git a/arch/src/aarch64/fdt.rs b/arch/src/aarch64/fdt.rs index e6b9722270..2755012440 100644 --- a/arch/src/aarch64/fdt.rs +++ b/arch/src/aarch64/fdt.rs @@ -999,39 +999,39 @@ fn create_pci_nodes( fdt.property_array_u32("msi-map", &msi_map)?; fdt.property_u32("msi-parent", MSI_PHANDLE)?; - if pci_device_info_elem.pci_segment_id == 0 { - if let Some(virtio_iommu_bdf) = virtio_iommu_bdf { - // See kernel document Documentation/devicetree/bindings/pci/pci-iommu.txt - // for 'iommu-map' attribute setting. - let iommu_map = [ - 0_u32, - VIRTIO_IOMMU_PHANDLE, - 0_u32, - virtio_iommu_bdf, - virtio_iommu_bdf + 1, - VIRTIO_IOMMU_PHANDLE, - virtio_iommu_bdf + 1, - 0xffff - virtio_iommu_bdf, - ]; - fdt.property_array_u32("iommu-map", &iommu_map)?; - - // See kernel document Documentation/devicetree/bindings/virtio/iommu.txt - // for virtio-iommu node settings. - let virtio_iommu_node_name = format!("virtio_iommu@{virtio_iommu_bdf:x}"); - let virtio_iommu_node = fdt.begin_node(&virtio_iommu_node_name)?; - fdt.property_u32("#iommu-cells", 1)?; - fdt.property_string("compatible", "virtio,pci-iommu")?; - - // 'reg' is a five-cell address encoded as - // (phys.hi phys.mid phys.lo size.hi size.lo). phys.hi should contain the - // device's BDF as 0b00000000 bbbbbbbb dddddfff 00000000. The other cells - // should be zero. - let reg = [virtio_iommu_bdf << 8, 0_u32, 0_u32, 0_u32, 0_u32]; - fdt.property_array_u32("reg", ®)?; - fdt.property_u32("phandle", VIRTIO_IOMMU_PHANDLE)?; - - fdt.end_node(virtio_iommu_node)?; - } + if pci_device_info_elem.pci_segment_id == 0 + && let Some(virtio_iommu_bdf) = virtio_iommu_bdf + { + // See kernel document Documentation/devicetree/bindings/pci/pci-iommu.txt + // for 'iommu-map' attribute setting. + let iommu_map = [ + 0_u32, + VIRTIO_IOMMU_PHANDLE, + 0_u32, + virtio_iommu_bdf, + virtio_iommu_bdf + 1, + VIRTIO_IOMMU_PHANDLE, + virtio_iommu_bdf + 1, + 0xffff - virtio_iommu_bdf, + ]; + fdt.property_array_u32("iommu-map", &iommu_map)?; + + // See kernel document Documentation/devicetree/bindings/virtio/iommu.txt + // for virtio-iommu node settings. + let virtio_iommu_node_name = format!("virtio_iommu@{virtio_iommu_bdf:x}"); + let virtio_iommu_node = fdt.begin_node(&virtio_iommu_node_name)?; + fdt.property_u32("#iommu-cells", 1)?; + fdt.property_string("compatible", "virtio,pci-iommu")?; + + // 'reg' is a five-cell address encoded as + // (phys.hi phys.mid phys.lo size.hi size.lo). phys.hi should contain the + // device's BDF as 0b00000000 bbbbbbbb dddddfff 00000000. The other cells + // should be zero. + let reg = [virtio_iommu_bdf << 8, 0_u32, 0_u32, 0_u32, 0_u32]; + fdt.property_array_u32("reg", ®)?; + fdt.property_u32("phandle", VIRTIO_IOMMU_PHANDLE)?; + + fdt.end_node(virtio_iommu_node)?; } fdt.end_node(pci_node)?; diff --git a/arch/src/x86_64/mod.rs b/arch/src/x86_64/mod.rs index 71edd4508e..648220e070 100644 --- a/arch/src/x86_64/mod.rs +++ b/arch/src/x86_64/mod.rs @@ -810,31 +810,22 @@ pub fn configure_vcpu( ); // The TSC frequency CPUID leaf should not be included when running with HyperV emulation - if !kvm_hyperv { - if let Some(tsc_khz) = vcpu.tsc_khz().map_err(Error::GetTscFrequency)? { - // Need to check that the TSC doesn't vary with dynamic frequency - // SAFETY: cpuid called with valid leaves - if unsafe { std::arch::x86_64::__cpuid(0x8000_0007) }.edx - & (1u32 << INVARIANT_TSC_EDX_BIT) - > 0 - { - CpuidPatch::set_cpuid_reg( - &mut cpuid, - 0x4000_0000, - None, - CpuidReg::EAX, - 0x4000_0010, - ); - cpuid.retain(|c| c.function != 0x4000_0010); - cpuid.push(CpuIdEntry { - function: 0x4000_0010, - eax: tsc_khz, - ebx: 1000000, /* LAPIC resolution of 1ns (freq: 1GHz) is hardcoded in KVM's - * APIC_BUS_CYCLE_NS */ - ..Default::default() - }); - }; - } + if !kvm_hyperv && let Some(tsc_khz) = vcpu.tsc_khz().map_err(Error::GetTscFrequency)? { + // Need to check that the TSC doesn't vary with dynamic frequency + // SAFETY: cpuid called with valid leaves + if unsafe { std::arch::x86_64::__cpuid(0x8000_0007) }.edx & (1u32 << INVARIANT_TSC_EDX_BIT) + > 0 + { + CpuidPatch::set_cpuid_reg(&mut cpuid, 0x4000_0000, None, CpuidReg::EAX, 0x4000_0010); + cpuid.retain(|c| c.function != 0x4000_0010); + cpuid.push(CpuIdEntry { + function: 0x4000_0010, + eax: tsc_khz, + ebx: 1000000, /* LAPIC resolution of 1ns (freq: 1GHz) is hardcoded in KVM's + * APIC_BUS_CYCLE_NS */ + ..Default::default() + }); + }; } for c in &cpuid { @@ -932,10 +923,10 @@ pub fn configure_system( mptable::setup_mptable(offset, guest_mem, _num_cpus, topology).map_err(Error::MpTableSetup)?; // Check that the RAM is not smaller than the RSDP start address - if let Some(rsdp_addr) = rsdp_addr { - if rsdp_addr.0 > guest_mem.last_addr().0 { - return Err(super::Error::RsdpPastRamEnd); - } + if let Some(rsdp_addr) = rsdp_addr + && rsdp_addr.0 > guest_mem.last_addr().0 + { + return Err(super::Error::RsdpPastRamEnd); } match setup_header { diff --git a/block/src/qcow/mod.rs b/block/src/qcow/mod.rs index da68146919..14deafc856 100644 --- a/block/src/qcow/mod.rs +++ b/block/src/qcow/mod.rs @@ -287,11 +287,12 @@ impl QcowHeader { let cluster_bits: u32 = DEFAULT_CLUSTER_BITS; let cluster_size: u32 = 0x01 << cluster_bits; let max_length: usize = (cluster_size - header_size) as usize; - if let Some(path) = backing_file { - if path.len() > max_length { - return Err(Error::BackingFileTooLong(path.len() - max_length)); - } + if let Some(path) = backing_file + && path.len() > max_length + { + return Err(Error::BackingFileTooLong(path.len() - max_length)); } + // L2 blocks are always one cluster long. They contain cluster_size/sizeof(u64) addresses. let entries_per_cluster: u32 = cluster_size / size_of::() as u32; let num_clusters: u32 = div_round_up_u64(size, u64::from(cluster_size)) as u32; @@ -589,14 +590,12 @@ impl QcowFile { // Check for compressed blocks for l2_addr_disk in l1_table.get_values() { - if *l2_addr_disk != 0 { - if let Err(e) = Self::read_l2_cluster(&mut raw_file, *l2_addr_disk) { - if let Some(os_error) = e.raw_os_error() { - if os_error == ENOTSUP { - return Err(Error::CompressedBlocksNotSupported); - } - } - } + if *l2_addr_disk != 0 + && let Err(e) = Self::read_l2_cluster(&mut raw_file, *l2_addr_disk) + && let Some(os_error) = e.raw_os_error() + && os_error == ENOTSUP + { + return Err(Error::CompressedBlocksNotSupported); } } @@ -1584,11 +1583,11 @@ impl Seek for QcowFile { } }; - if let Some(o) = new_offset { - if o <= self.virtual_size() { - self.current_offset = o; - return Ok(o); - } + if let Some(o) = new_offset + && o <= self.virtual_size() + { + self.current_offset = o; + return Ok(o); } Err(std::io::Error::from_raw_os_error(EINVAL)) } diff --git a/block/src/qcow/vec_cache.rs b/block/src/qcow/vec_cache.rs index 76e5d44430..4b18518b0e 100644 --- a/block/src/qcow/vec_cache.rs +++ b/block/src/qcow/vec_cache.rs @@ -123,10 +123,10 @@ impl CacheMap { if self.map.len() == self.capacity { // TODO(dgreid) - smarter eviction strategy. let to_evict = *self.map.iter().next().unwrap().0; - if let Some(evicted) = self.map.remove(&to_evict) { - if evicted.dirty() { - write_callback(to_evict, evicted)?; - } + if let Some(evicted) = self.map.remove(&to_evict) + && evicted.dirty() + { + write_callback(to_evict, evicted)?; } } self.map.insert(index, block); diff --git a/block/src/vhdx/mod.rs b/block/src/vhdx/mod.rs index d46905d934..141c43c6d5 100644 --- a/block/src/vhdx/mod.rs +++ b/block/src/vhdx/mod.rs @@ -187,11 +187,11 @@ impl Seek for Vhdx { } }; - if let Some(o) = new_offset { - if o <= self.virtual_disk_size() { - self.current_offset = o; - return Ok(o); - } + if let Some(o) = new_offset + && o <= self.virtual_disk_size() + { + self.current_offset = o; + return Ok(o); } Err(std::io::Error::new( diff --git a/build.rs b/build.rs index 37a5ffd9fa..080c625599 100644 --- a/build.rs +++ b/build.rs @@ -9,14 +9,13 @@ use std::process::Command; fn main() { let mut version = "v".to_owned() + env!("CARGO_PKG_VERSION"); - if let Ok(git_out) = Command::new("git").args(["describe", "--dirty"]).output() { - if git_out.status.success() { - if let Ok(git_out_str) = String::from_utf8(git_out.stdout) { - version = git_out_str; - // Pop the trailing newline. - version.pop(); - } - } + if let Ok(git_out) = Command::new("git").args(["describe", "--dirty"]).output() + && git_out.status.success() + && let Ok(git_out_str) = String::from_utf8(git_out.stdout) + { + version = git_out_str; + // Pop the trailing newline. + version.pop(); } // Append CH_EXTRA_VERSION to version if it is set. diff --git a/devices/src/tpm.rs b/devices/src/tpm.rs index 4219f58308..72fef3e539 100644 --- a/devices/src/tpm.rs +++ b/devices/src/tpm.rs @@ -458,10 +458,9 @@ impl BusDevice for Tpm { CRB_CTRL_CANCEL => { if v == CRB_CANCEL_INVOKE && (self.regs[CRB_CTRL_START as usize] & CRB_START_INVOKE != 0) + && let Err(e) = self.emulator.cancel_cmd() { - if let Err(e) = self.emulator.cancel_cmd() { - error!("Failed to run cancel command. Error: {:?}", e); - } + error!("Failed to run cancel command. Error: {:?}", e); } } CRB_CTRL_START => { diff --git a/hypervisor/src/arch/x86/emulator/mod.rs b/hypervisor/src/arch/x86/emulator/mod.rs index 9cc8ac4721..778fd47f57 100644 --- a/hypervisor/src/arch/x86/emulator/mod.rs +++ b/hypervisor/src/arch/x86/emulator/mod.rs @@ -624,11 +624,11 @@ impl Emulator<'_, T> { last_decoded_ip = decoder.ip(); num_insn_emulated += 1; - if let Some(num_insn) = num_insn { - if num_insn_emulated >= num_insn { - // Exit the decoding loop, do not decode the next instruction. - stop_emulation = true; - } + if let Some(num_insn) = num_insn + && num_insn_emulated >= num_insn + { + // Exit the decoding loop, do not decode the next instruction. + stop_emulation = true; } } diff --git a/hypervisor/src/mshv/x86_64/emulator.rs b/hypervisor/src/mshv/x86_64/emulator.rs index a8f38ba86f..4ecdee2a42 100644 --- a/hypervisor/src/mshv/x86_64/emulator.rs +++ b/hypervisor/src/mshv/x86_64/emulator.rs @@ -44,12 +44,12 @@ impl MshvEmulatorContext<'_> { gpa ); - if let Some(vm_ops) = &self.vcpu.vm_ops { - if vm_ops.guest_mem_read(gpa, data).is_err() { - vm_ops - .mmio_read(gpa, data) - .map_err(|e| PlatformError::MemoryReadFailure(e.into()))?; - } + if let Some(vm_ops) = &self.vcpu.vm_ops + && vm_ops.guest_mem_read(gpa, data).is_err() + { + vm_ops + .mmio_read(gpa, data) + .map_err(|e| PlatformError::MemoryReadFailure(e.into()))?; } Ok(()) @@ -94,12 +94,12 @@ impl MshvEmulatorContext<'_> { gpa ); - if let Some(vm_ops) = &self.vcpu.vm_ops { - if vm_ops.guest_mem_write(gpa, data).is_err() { - vm_ops - .mmio_write(gpa, data) - .map_err(|e| PlatformError::MemoryWriteFailure(e.into()))?; - } + if let Some(vm_ops) = &self.vcpu.vm_ops + && vm_ops.guest_mem_write(gpa, data).is_err() + { + vm_ops + .mmio_write(gpa, data) + .map_err(|e| PlatformError::MemoryWriteFailure(e.into()))?; } Ok(()) diff --git a/net_util/src/tap.rs b/net_util/src/tap.rs index 2544b9eee7..2916e66cf6 100644 --- a/net_util/src/tap.rs +++ b/net_util/src/tap.rs @@ -866,11 +866,11 @@ mod tests { let p = ParsedPkt::new(buf); p.print(); - if let Some(ref udp) = p.udp { - if payload == udp.payload() { - channel_tx.send(true).unwrap(); - break; - } + if let Some(ref udp) = p.udp + && payload == udp.payload() + { + channel_tx.send(true).unwrap(); + break; } } }); diff --git a/pci/src/configuration.rs b/pci/src/configuration.rs index 7264f7caf1..706947050c 100644 --- a/pci/src/configuration.rs +++ b/pci/src/configuration.rs @@ -828,10 +828,10 @@ impl PciConfiguration { let mut addr = u64::from(self.bars[bar_num].addr & self.writable_bits[bar_idx]); - if let Some(bar_type) = self.bars[bar_num].r#type { - if bar_type == PciBarRegionType::Memory64BitRegion { - addr |= u64::from(self.bars[bar_num + 1].addr) << 32; - } + if let Some(bar_type) = self.bars[bar_num].r#type + && bar_type == PciBarRegionType::Memory64BitRegion + { + addr |= u64::from(self.bars[bar_num + 1].addr) << 32; } addr @@ -907,19 +907,19 @@ impl PciConfiguration { } // Handle potential write to MSI-X message control register - if let Some(msix_cap_reg_idx) = self.msix_cap_reg_idx { - if let Some(msix_config) = &self.msix_config { - if msix_cap_reg_idx == reg_idx && offset == 2 && data.len() == 2 { - msix_config - .lock() - .unwrap() - .set_msg_ctl(LittleEndian::read_u16(data)); - } else if msix_cap_reg_idx == reg_idx && offset == 0 && data.len() == 4 { - msix_config - .lock() - .unwrap() - .set_msg_ctl((LittleEndian::read_u32(data) >> 16) as u16); - } + if let Some(msix_cap_reg_idx) = self.msix_cap_reg_idx + && let Some(msix_config) = &self.msix_config + { + if msix_cap_reg_idx == reg_idx && offset == 2 && data.len() == 2 { + msix_config + .lock() + .unwrap() + .set_msg_ctl(LittleEndian::read_u16(data)); + } else if msix_cap_reg_idx == reg_idx && offset == 0 && data.len() == 4 { + msix_config + .lock() + .unwrap() + .set_msg_ctl((LittleEndian::read_u32(data) >> 16) as u16); } } diff --git a/pci/src/msi.rs b/pci/src/msi.rs index ebb7aa3e90..a0215dcdd3 100644 --- a/pci/src/msi.rs +++ b/pci/src/msi.rs @@ -271,15 +271,11 @@ impl MsiConfig { } } - if !old_enabled { - if let Err(e) = self.interrupt_source_group.enable() { - error!("Failed enabling irq_fd: {:?}", e); - } - } - } else if old_enabled { - if let Err(e) = self.interrupt_source_group.disable() { - error!("Failed disabling irq_fd: {:?}", e); + if !old_enabled && let Err(e) = self.interrupt_source_group.enable() { + error!("Failed enabling irq_fd: {:?}", e); } + } else if old_enabled && let Err(e) = self.interrupt_source_group.disable() { + error!("Failed disabling irq_fd: {:?}", e); } } } diff --git a/pci/src/vfio.rs b/pci/src/vfio.rs index 97be4a7bc1..8372046acf 100644 --- a/pci/src/vfio.rs +++ b/pci/src/vfio.rs @@ -208,21 +208,20 @@ impl Interrupt { } fn accessed(&self, offset: u64) -> Option<(PciCapabilityId, u64)> { - if let Some(msi) = &self.msi { - if offset >= u64::from(msi.cap_offset) - && offset < u64::from(msi.cap_offset) + msi.cfg.size() - { - return Some(( - PciCapabilityId::MessageSignalledInterrupts, - u64::from(msi.cap_offset), - )); - } + if let Some(msi) = &self.msi + && offset >= u64::from(msi.cap_offset) + && offset < u64::from(msi.cap_offset) + msi.cfg.size() + { + return Some(( + PciCapabilityId::MessageSignalledInterrupts, + u64::from(msi.cap_offset), + )); } - if let Some(msix) = &self.msix { - if offset == u64::from(msix.cap_offset) { - return Some((PciCapabilityId::MsiX, u64::from(msix.cap_offset))); - } + if let Some(msix) = &self.msix + && offset == u64::from(msix.cap_offset) + { + return Some((PciCapabilityId::MsiX, u64::from(msix.cap_offset))); } None @@ -603,13 +602,12 @@ impl VfioCommon { type_, .. } = resource + && *index == bar_id as usize { - if *index == bar_id as usize { - restored_bar_addr = Some(GuestAddress(*base)); - region_size = *size; - region_type = PciBarRegionType::from(*type_); - break; - } + restored_bar_addr = Some(GuestAddress(*base)); + region_size = *size; + region_type = PciBarRegionType::from(*type_); + break; } } if restored_bar_addr.is_none() { @@ -925,24 +923,23 @@ impl VfioCommon { match PciCapabilityId::from(cap_id) { PciCapabilityId::MessageSignalledInterrupts => { - if let Some(irq_info) = self.vfio_wrapper.get_irq_info(VFIO_PCI_MSI_IRQ_INDEX) { - if irq_info.count > 0 { - // Parse capability only if the VFIO device - // supports MSI. - let msg_ctl = self.parse_msi_capabilities(cap_iter); - self.initialize_msi(msg_ctl, cap_iter as u32, None); - } + if let Some(irq_info) = self.vfio_wrapper.get_irq_info(VFIO_PCI_MSI_IRQ_INDEX) + && irq_info.count > 0 + { + // Parse capability only if the VFIO device + // supports MSI. + let msg_ctl = self.parse_msi_capabilities(cap_iter); + self.initialize_msi(msg_ctl, cap_iter as u32, None); } } PciCapabilityId::MsiX => { if let Some(irq_info) = self.vfio_wrapper.get_irq_info(VFIO_PCI_MSIX_IRQ_INDEX) + && irq_info.count > 0 { - if irq_info.count > 0 { - // Parse capability only if the VFIO device - // supports MSI-X. - let msix_cap = self.parse_msix_capabilities(cap_iter); - self.initialize_msix(msix_cap, cap_iter as u32, bdf, None); - } + // Parse capability only if the VFIO device + // supports MSI-X. + let msix_cap = self.parse_msix_capabilities(cap_iter); + self.initialize_msix(msix_cap, cap_iter as u32, bdf, None); } } PciCapabilityId::PciExpress => pci_express_cap_found = true, @@ -1038,17 +1035,17 @@ impl VfioCommon { } pub(crate) fn enable_intx(&mut self) -> Result<(), VfioPciError> { - if let Some(intx) = &mut self.interrupt.intx { - if !intx.enabled { - if let Some(eventfd) = intx.interrupt_source_group.notifier(0) { - self.vfio_wrapper - .enable_irq(VFIO_PCI_INTX_IRQ_INDEX, vec![&eventfd]) - .map_err(VfioPciError::EnableIntx)?; + if let Some(intx) = &mut self.interrupt.intx + && !intx.enabled + { + if let Some(eventfd) = intx.interrupt_source_group.notifier(0) { + self.vfio_wrapper + .enable_irq(VFIO_PCI_INTX_IRQ_INDEX, vec![&eventfd]) + .map_err(VfioPciError::EnableIntx)?; - intx.enabled = true; - } else { - return Err(VfioPciError::MissingNotifier); - } + intx.enabled = true; + } else { + return Err(VfioPciError::MissingNotifier); } } @@ -1056,13 +1053,13 @@ impl VfioCommon { } pub(crate) fn disable_intx(&mut self) { - if let Some(intx) = &mut self.interrupt.intx { - if intx.enabled { - if let Err(e) = self.vfio_wrapper.disable_irq(VFIO_PCI_INTX_IRQ_INDEX) { - error!("Could not disable INTx: {}", e); - } else { - intx.enabled = false; - } + if let Some(intx) = &mut self.interrupt.intx + && intx.enabled + { + if let Err(e) = self.vfio_wrapper.disable_irq(VFIO_PCI_INTX_IRQ_INDEX) { + error!("Could not disable INTx: {}", e); + } else { + intx.enabled = false; } } } @@ -1118,12 +1115,12 @@ impl VfioCommon { } fn initialize_legacy_interrupt(&mut self) -> Result<(), VfioPciError> { - if let Some(irq_info) = self.vfio_wrapper.get_irq_info(VFIO_PCI_INTX_IRQ_INDEX) { - if irq_info.count == 0 { - // A count of 0 means the INTx IRQ is not supported, therefore - // it shouldn't be initialized. - return Ok(()); - } + if let Some(irq_info) = self.vfio_wrapper.get_irq_info(VFIO_PCI_INTX_IRQ_INDEX) + && irq_info.count == 0 + { + // A count of 0 means the INTx IRQ is not supported, therefore + // it shouldn't be initialized. + return Ok(()); } if let Some(interrupt_source_group) = self.legacy_interrupt_group.clone() { @@ -1200,10 +1197,10 @@ impl VfioCommon { // INTx EOI // The guest reading from the BAR potentially means the interrupt has // been received and can be acknowledged. - if self.interrupt.intx_in_use() { - if let Err(e) = self.vfio_wrapper.unmask_irq(VFIO_PCI_INTX_IRQ_INDEX) { - error!("Failed unmasking INTx IRQ: {}", e); - } + if self.interrupt.intx_in_use() + && let Err(e) = self.vfio_wrapper.unmask_irq(VFIO_PCI_INTX_IRQ_INDEX) + { + error!("Failed unmasking INTx IRQ: {}", e); } } @@ -1228,10 +1225,10 @@ impl VfioCommon { // INTx EOI // The guest writing to the BAR potentially means the interrupt has // been received and can be acknowledged. - if self.interrupt.intx_in_use() { - if let Err(e) = self.vfio_wrapper.unmask_irq(VFIO_PCI_INTX_IRQ_INDEX) { - error!("Failed unmasking INTx IRQ: {}", e); - } + if self.interrupt.intx_in_use() + && let Err(e) = self.vfio_wrapper.unmask_irq(VFIO_PCI_INTX_IRQ_INDEX) + { + error!("Failed unmasking INTx IRQ: {}", e); } None @@ -1619,12 +1616,11 @@ impl VfioPciDevice { // Don't try to mmap the region if it contains MSI-X table or // MSI-X PBA subregion, and if we couldn't find MSIX_MAPPABLE // in the list of supported capabilities. - if let Some(msix) = self.common.interrupt.msix.as_ref() { - if (region.index == msix.cap.table_bir() || region.index == msix.cap.pba_bir()) - && !caps.contains(&VfioRegionInfoCap::MsixMappable) - { - continue; - } + if let Some(msix) = self.common.interrupt.msix.as_ref() + && (region.index == msix.cap.table_bir() || region.index == msix.cap.pba_bir()) + && !caps.contains(&VfioRegionInfoCap::MsixMappable) + { + continue; } let mmap_size = self.device.get_region_size(region.index); @@ -1713,18 +1709,17 @@ impl VfioPciDevice { for region in self.common.mmio_regions.iter() { for user_memory_region in region.user_memory_regions.iter() { // Unmap from vfio container - if !self.iommu_attached { - if let Err(e) = self + if !self.iommu_attached + && let Err(e) = self .container .vfio_dma_unmap(user_memory_region.start, user_memory_region.size) .map_err(|e| VfioPciError::DmaUnmap(e, self.device_path.clone(), self.bdf)) - { - error!( - "Could not unmap mmio region from vfio container: \ + { + error!( + "Could not unmap mmio region from vfio container: \ iova 0x{:x}, size 0x{:x}: {}, ", - user_memory_region.start, user_memory_region.size, e - ); - } + user_memory_region.start, user_memory_region.size, e + ); } // Remove region @@ -1791,16 +1786,16 @@ impl Drop for VfioPciDevice { fn drop(&mut self) { self.unmap_mmio_regions(); - if let Some(msix) = &self.common.interrupt.msix { - if msix.bar.enabled() { - self.common.disable_msix(); - } + if let Some(msix) = &self.common.interrupt.msix + && msix.bar.enabled() + { + self.common.disable_msix(); } - if let Some(msi) = &self.common.interrupt.msi { - if msi.cfg.enabled() { - self.common.disable_msi() - } + if let Some(msi) = &self.common.interrupt.msi + && msi.cfg.enabled() + { + self.common.disable_msi() } if self.common.interrupt.intx_in_use() { @@ -1898,20 +1893,19 @@ impl PciDevice for VfioPciDevice { for user_memory_region in region.user_memory_regions.iter_mut() { // Unmap the old MMIO region from vfio container - if !self.iommu_attached { - if let Err(e) = self + if !self.iommu_attached + && let Err(e) = self .container .vfio_dma_unmap(user_memory_region.start, user_memory_region.size) .map_err(|e| { VfioPciError::DmaUnmap(e, self.device_path.clone(), self.bdf) }) - { - error!( - "Could not unmap mmio region from vfio container: \ + { + error!( + "Could not unmap mmio region from vfio container: \ iova 0x{:x}, size 0x{:x}: {}, ", - user_memory_region.start, user_memory_region.size, e - ); - } + user_memory_region.start, user_memory_region.size, e + ); } // Remove old region diff --git a/pci/src/vfio_user.rs b/pci/src/vfio_user.rs index 7ca1d28814..e92c719964 100644 --- a/pci/src/vfio_user.rs +++ b/pci/src/vfio_user.rs @@ -505,16 +505,16 @@ impl Drop for VfioUserPciDevice { fn drop(&mut self) { self.unmap_mmio_regions(); - if let Some(msix) = &self.common.interrupt.msix { - if msix.bar.enabled() { - self.common.disable_msix(); - } + if let Some(msix) = &self.common.interrupt.msix + && msix.bar.enabled() + { + self.common.disable_msix(); } - if let Some(msi) = &self.common.interrupt.msi { - if msi.cfg.enabled() { - self.common.disable_msi() - } + if let Some(msi) = &self.common.interrupt.msi + && msi.cfg.enabled() + { + self.common.disable_msi() } if self.common.interrupt.intx_in_use() { diff --git a/rate_limiter/src/group.rs b/rate_limiter/src/group.rs index f883115641..a986a7f581 100644 --- a/rate_limiter/src/group.rs +++ b/rate_limiter/src/group.rs @@ -288,10 +288,10 @@ impl Drop for RateLimiterGroup { fn drop(&mut self) { self.kill_evt.write(1).unwrap(); - if let Some(t) = self.epoll_thread.take() { - if let Err(e) = t.join() { - error!("Error joining thread: {:?}", e); - } + if let Some(t) = self.epoll_thread.take() + && let Err(e) = t.join() + { + error!("Error joining thread: {:?}", e); } } } diff --git a/src/main.rs b/src/main.rs index 3c94f36d88..7bdcbfac8b 100644 --- a/src/main.rs +++ b/src/main.rs @@ -754,10 +754,10 @@ fn start_vmm(cmd_arguments: ArgMatches) -> Result, Error> { Ok(()) })(); - if r.is_err() { - if let Err(e) = exit_evt.write(1) { - warn!("writing to exit EventFd: {e}"); - } + if r.is_err() + && let Err(e) = exit_evt.write(1) + { + warn!("writing to exit EventFd: {e}"); } if landlock_enable { diff --git a/test_infra/src/lib.rs b/test_infra/src/lib.rs index 641c5a15ab..7dceaecce1 100644 --- a/test_infra/src/lib.rs +++ b/test_infra/src/lib.rs @@ -481,13 +481,14 @@ pub fn rate_limited_copy, Q: AsRef>(from: P, to: Q) -> io:: match fs::copy(&from, &to) { Err(e) => { - if let Some(errno) = e.raw_os_error() { - if errno == libc::ENOSPC { - eprintln!("Copy returned ENOSPC. Attempt {i} of 10. Sleeping."); - thread::sleep(std::time::Duration::new(60, 0)); - continue; - } + if let Some(errno) = e.raw_os_error() + && errno == libc::ENOSPC + { + eprintln!("Copy returned ENOSPC. Attempt {i} of 10. Sleeping."); + thread::sleep(std::time::Duration::new(60, 0)); + continue; } + return Err(e); } Ok(i) => return Ok(i), @@ -1094,12 +1095,11 @@ impl Guest { let vendors: Vec<&str> = vendors.split('\n').collect(); for (index, d_id) in devices.iter().enumerate() { - if *d_id == device_id { - if let Some(v_id) = vendors.get(index) { - if *v_id == vendor_id { - return Ok(true); - } - } + if *d_id == device_id + && let Some(v_id) = vendors.get(index) + && *v_id == vendor_id + { + return Ok(true); } } diff --git a/virtio-devices/src/console.rs b/virtio-devices/src/console.rs index 2c0e250a4d..6f237b5eed 100644 --- a/virtio-devices/src/console.rs +++ b/virtio-devices/src/console.rs @@ -454,12 +454,11 @@ impl EpollHelperHandler for ConsoleEpollHandler { } if self.endpoint.is_pty() { self.file_event_registered = false; - if event.events & libc::EPOLLHUP as u32 != 0 { - if let Some(pty_write_out) = &self.write_out { - if pty_write_out.load(Ordering::Acquire) { - pty_write_out.store(false, Ordering::Release); - } - } + if event.events & libc::EPOLLHUP as u32 != 0 + && let Some(pty_write_out) = &self.write_out + && pty_write_out.load(Ordering::Acquire) + { + pty_write_out.store(false, Ordering::Release); } else { // If the EPOLLHUP flag is not up on the associated event, we // can assume the other end of the PTY is connected and therefore @@ -731,10 +730,10 @@ impl VirtioDevice for Console { .acked_features .store(self.common.acked_features, Ordering::Relaxed); - if self.common.feature_acked(VIRTIO_CONSOLE_F_SIZE) { - if let Err(e) = interrupt_cb.trigger(VirtioInterruptType::Config) { - error!("Failed to signal console driver: {:?}", e); - } + if self.common.feature_acked(VIRTIO_CONSOLE_F_SIZE) + && let Err(e) = interrupt_cb.trigger(VirtioInterruptType::Config) + { + error!("Failed to signal console driver: {:?}", e); } let (kill_evt, pause_evt) = self.common.dup_eventfds(); diff --git a/virtio-devices/src/iommu.rs b/virtio-devices/src/iommu.rs index 71c10c1d0c..f2795a8cff 100644 --- a/virtio-devices/src/iommu.rs +++ b/virtio-devices/src/iommu.rs @@ -421,13 +421,12 @@ impl Request { // If any other mappings exist in the domain for other containers, // make sure to issue these mappings for the new endpoint/container if let Some(domain_mappings) = &mapping.domains.read().unwrap().get(&domain_id) + && let Some(ext_map) = ext_mapping.get(&endpoint) { - if let Some(ext_map) = ext_mapping.get(&endpoint) { - for (virt_start, addr_map) in &domain_mappings.mappings { - ext_map - .map(*virt_start, addr_map.gpa, addr_map.size) - .map_err(Error::ExternalUnmapping)?; - } + for (virt_start, addr_map) in &domain_mappings.mappings { + ext_map + .map(*virt_start, addr_map.gpa, addr_map.size) + .map_err(Error::ExternalUnmapping)?; } } @@ -654,13 +653,13 @@ fn detach_endpoint_from_domain( mapping.endpoints.write().unwrap().remove(&endpoint); // Trigger external unmapping for the endpoint if necessary. - if let Some(domain_mappings) = &mapping.domains.read().unwrap().get(&domain_id) { - if let Some(ext_map) = ext_mapping.get(&endpoint) { - for (virt_start, addr_map) in &domain_mappings.mappings { - ext_map - .unmap(*virt_start, addr_map.size) - .map_err(Error::ExternalUnmapping)?; - } + if let Some(domain_mappings) = &mapping.domains.read().unwrap().get(&domain_id) + && let Some(ext_map) = ext_mapping.get(&endpoint) + { + for (virt_start, addr_map) in &domain_mappings.mappings { + ext_map + .unmap(*virt_start, addr_map.size) + .map_err(Error::ExternalUnmapping)?; } } diff --git a/virtio-devices/src/mem.rs b/virtio-devices/src/mem.rs index 6971f07151..7893be6b1a 100644 --- a/virtio-devices/src/mem.rs +++ b/virtio-devices/src/mem.rs @@ -475,11 +475,9 @@ impl MemEpollHandler { return VIRTIO_MEM_RESP_ERROR; } - if !plug { - if let Err(e) = self.discard_memory_range(offset, size) { - error!("failed discarding memory range: {:?}", e); - return VIRTIO_MEM_RESP_ERROR; - } + if !plug && let Err(e) = self.discard_memory_range(offset, size) { + error!("failed discarding memory range: {:?}", e); + return VIRTIO_MEM_RESP_ERROR; } self.blocks_state diff --git a/virtio-devices/src/net.rs b/virtio-devices/src/net.rs index 4518321ecf..bbe0a8b37f 100644 --- a/virtio-devices/src/net.rs +++ b/virtio-devices/src/net.rs @@ -667,10 +667,10 @@ impl Drop for Net { } // Needed to ensure all references to tap FDs are dropped (#4868) self.common.wait_for_epoll_threads(); - if let Some(thread) = self.ctrl_queue_epoll_thread.take() { - if let Err(e) = thread.join() { - error!("Error joining thread: {:?}", e); - } + if let Some(thread) = self.ctrl_queue_epoll_thread.take() + && let Err(e) = thread.join() + { + error!("Error joining thread: {:?}", e); } } } diff --git a/virtio-devices/src/thread_helper.rs b/virtio-devices/src/thread_helper.rs index d4df6c86b6..74aaddf8d4 100644 --- a/virtio-devices/src/thread_helper.rs +++ b/virtio-devices/src/thread_helper.rs @@ -36,12 +36,12 @@ where thread::Builder::new() .name(name.to_string()) .spawn(move || { - if !seccomp_filter.is_empty() { - if let Err(e) = apply_filter(&seccomp_filter) { - error!("Error applying seccomp filter: {:?}", e); - thread_exit_evt.write(1).ok(); - return; - } + if !seccomp_filter.is_empty() + && let Err(e) = apply_filter(&seccomp_filter) + { + error!("Error applying seccomp filter: {:?}", e); + thread_exit_evt.write(1).ok(); + return; } match std::panic::catch_unwind(AssertUnwindSafe(f)) { Err(_) => { diff --git a/virtio-devices/src/transport/pci_common_config.rs b/virtio-devices/src/transport/pci_common_config.rs index c87e36e92b..549453a5d6 100644 --- a/virtio-devices/src/transport/pci_common_config.rs +++ b/virtio-devices/src/transport/pci_common_config.rs @@ -261,36 +261,28 @@ impl VirtioPciCommonConfig { let ready = value == 1; q.set_ready(ready); // Translate address of descriptor table and vrings. - if let Some(access_platform) = &self.access_platform { - if ready { - let desc_table = access_platform - .translate_gva( - q.desc_table(), - get_vring_size(VringType::Desc, q.size()), - ) - .unwrap(); - let avail_ring = access_platform - .translate_gva( - q.avail_ring(), - get_vring_size(VringType::Avail, q.size()), - ) - .unwrap(); - let used_ring = access_platform - .translate_gva(q.used_ring(), get_vring_size(VringType::Used, q.size())) - .unwrap(); - q.set_desc_table_address( - Some((desc_table & 0xffff_ffff) as u32), - Some((desc_table >> 32) as u32), - ); - q.set_avail_ring_address( - Some((avail_ring & 0xffff_ffff) as u32), - Some((avail_ring >> 32) as u32), - ); - q.set_used_ring_address( - Some((used_ring & 0xffff_ffff) as u32), - Some((used_ring >> 32) as u32), - ); - } + if ready && let Some(access_platform) = &self.access_platform { + let desc_table = access_platform + .translate_gva(q.desc_table(), get_vring_size(VringType::Desc, q.size())) + .unwrap(); + let avail_ring = access_platform + .translate_gva(q.avail_ring(), get_vring_size(VringType::Avail, q.size())) + .unwrap(); + let used_ring = access_platform + .translate_gva(q.used_ring(), get_vring_size(VringType::Used, q.size())) + .unwrap(); + q.set_desc_table_address( + Some((desc_table & 0xffff_ffff) as u32), + Some((desc_table >> 32) as u32), + ); + q.set_avail_ring_address( + Some((avail_ring & 0xffff_ffff) as u32), + Some((avail_ring >> 32) as u32), + ); + q.set_used_ring_address( + Some((used_ring & 0xffff_ffff) as u32), + Some((used_ring >> 32) as u32), + ); } }), _ => { diff --git a/virtio-devices/src/transport/pci_device.rs b/virtio-devices/src/transport/pci_device.rs index a16102218e..b05020bb47 100644 --- a/virtio-devices/src/transport/pci_device.rs +++ b/virtio-devices/src/transport/pci_device.rs @@ -968,18 +968,17 @@ impl PciDevice for VirtioPciDevice { if let Resource::PciBar { index, base, type_, .. } = resource + && index == VIRTIO_COMMON_BAR_INDEX { - if index == VIRTIO_COMMON_BAR_INDEX { - settings_bar_addr = Some(GuestAddress(base)); - use_64bit_bar = match type_ { - PciBarType::Io => { - return Err(PciDeviceError::InvalidResource(resource)); - } - PciBarType::Mmio32 => false, - PciBarType::Mmio64 => true, - }; - break; - } + settings_bar_addr = Some(GuestAddress(base)); + use_64bit_bar = match type_ { + PciBarType::Io => { + return Err(PciDeviceError::InvalidResource(resource)); + } + PciBarType::Mmio32 => false, + PciBarType::Mmio64 => true, + }; + break; } } // Error out if no resource was matching the BAR id. diff --git a/virtio-devices/src/vhost_user/blk.rs b/virtio-devices/src/vhost_user/blk.rs index d009280a86..7c0e20c1ac 100644 --- a/virtio-devices/src/vhost_user/blk.rs +++ b/virtio-devices/src/vhost_user/blk.rs @@ -213,16 +213,16 @@ impl Blk { impl Drop for Blk { fn drop(&mut self) { - if let Some(kill_evt) = self.common.kill_evt.take() { - if let Err(e) = kill_evt.write(1) { - error!("failed to kill vhost-user-blk: {:?}", e); - } + if let Some(kill_evt) = self.common.kill_evt.take() + && let Err(e) = kill_evt.write(1) + { + error!("failed to kill vhost-user-blk: {:?}", e); } self.common.wait_for_epoll_threads(); - if let Some(thread) = self.epoll_thread.take() { - if let Err(e) = thread.join() { - error!("Error joining thread: {:?}", e); - } + if let Some(thread) = self.epoll_thread.take() + && let Err(e) = thread.join() + { + error!("Error joining thread: {:?}", e); } } } @@ -267,16 +267,15 @@ impl VirtioDevice for Blk { } self.config.writeback = data[0]; - if let Some(vu) = &self.vu_common.vu { - if let Err(e) = vu + if let Some(vu) = &self.vu_common.vu + && let Err(e) = vu .lock() .unwrap() .socket_handle() .set_config(offset as u32, VhostUserConfigFlags::WRITABLE, data) .map_err(Error::VhostUserSetConfig) - { - error!("Failed setting vhost-user-blk configuration: {:?}", e); - } + { + error!("Failed setting vhost-user-blk configuration: {:?}", e); } } @@ -329,11 +328,11 @@ impl VirtioDevice for Blk { self.common.resume().ok()?; } - if let Some(vu) = &self.vu_common.vu { - if let Err(e) = vu.lock().unwrap().reset_vhost_user() { - error!("Failed to reset vhost-user daemon: {:?}", e); - return None; - } + if let Some(vu) = &self.vu_common.vu + && let Err(e) = vu.lock().unwrap().reset_vhost_user() + { + error!("Failed to reset vhost-user daemon: {:?}", e); + return None; } if let Some(kill_evt) = self.common.kill_evt.take() { diff --git a/virtio-devices/src/vhost_user/fs.rs b/virtio-devices/src/vhost_user/fs.rs index 4c70db2f1a..c420bb7a33 100644 --- a/virtio-devices/src/vhost_user/fs.rs +++ b/virtio-devices/src/vhost_user/fs.rs @@ -227,10 +227,10 @@ impl Drop for Fs { let _ = kill_evt.write(1); } self.common.wait_for_epoll_threads(); - if let Some(thread) = self.epoll_thread.take() { - if let Err(e) = thread.join() { - error!("Error joining thread: {:?}", e); - } + if let Some(thread) = self.epoll_thread.take() + && let Err(e) = thread.join() + { + error!("Error joining thread: {:?}", e); } } } @@ -308,11 +308,11 @@ impl VirtioDevice for Fs { self.common.resume().ok()?; } - if let Some(vu) = &self.vu_common.vu { - if let Err(e) = vu.lock().unwrap().reset_vhost_user() { - error!("Failed to reset vhost-user daemon: {:?}", e); - return None; - } + if let Some(vu) = &self.vu_common.vu + && let Err(e) = vu.lock().unwrap().reset_vhost_user() + { + error!("Failed to reset vhost-user daemon: {:?}", e); + return None; } if let Some(kill_evt) = self.common.kill_evt.take() { diff --git a/virtio-devices/src/vhost_user/net.rs b/virtio-devices/src/vhost_user/net.rs index 99c0f81555..0f4561bca5 100644 --- a/virtio-devices/src/vhost_user/net.rs +++ b/virtio-devices/src/vhost_user/net.rs @@ -243,23 +243,24 @@ impl Net { impl Drop for Net { fn drop(&mut self) { - if let Some(kill_evt) = self.common.kill_evt.take() { - if let Err(e) = kill_evt.write(1) { - error!("failed to kill vhost-user-net: {:?}", e); - } + if let Some(kill_evt) = self.common.kill_evt.take() + && let Err(e) = kill_evt.write(1) + { + error!("failed to kill vhost-user-net: {:?}", e); } self.common.wait_for_epoll_threads(); - if let Some(thread) = self.epoll_thread.take() { - if let Err(e) = thread.join() { - error!("Error joining thread: {:?}", e); - } + if let Some(thread) = self.epoll_thread.take() + && let Err(e) = thread.join() + { + error!("Error joining thread: {:?}", e); } - if let Some(thread) = self.ctrl_queue_epoll_thread.take() { - if let Err(e) = thread.join() { - error!("Error joining thread: {:?}", e); - } + + if let Some(thread) = self.ctrl_queue_epoll_thread.take() + && let Err(e) = thread.join() + { + error!("Error joining thread: {:?}", e); } } } @@ -382,11 +383,11 @@ impl VirtioDevice for Net { self.common.resume().ok()?; } - if let Some(vu) = &self.vu_common.vu { - if let Err(e) = vu.lock().unwrap().reset_vhost_user() { - error!("Failed to reset vhost-user daemon: {:?}", e); - return None; - } + if let Some(vu) = &self.vu_common.vu + && let Err(e) = vu.lock().unwrap().reset_vhost_user() + { + error!("Failed to reset vhost-user daemon: {:?}", e); + return None; } if let Some(kill_evt) = self.common.kill_evt.take() { diff --git a/virtio-devices/src/vhost_user/vu_common_ctrl.rs b/virtio-devices/src/vhost_user/vu_common_ctrl.rs index 91fb55a207..05034d0ec5 100644 --- a/virtio-devices/src/vhost_user/vu_common_ctrl.rs +++ b/virtio-devices/src/vhost_user/vu_common_ctrl.rs @@ -317,17 +317,16 @@ impl VhostUserHandle { .get_features() .map_err(Error::VhostUserGetFeatures)?; - if acked_features & VhostUserVirtioFeatures::PROTOCOL_FEATURES.bits() != 0 { - if let Some(acked_protocol_features) = + if acked_features & VhostUserVirtioFeatures::PROTOCOL_FEATURES.bits() != 0 + && let Some(acked_protocol_features) = VhostUserProtocolFeatures::from_bits(acked_protocol_features) - { - self.vu - .set_protocol_features(acked_protocol_features) - .map_err(Error::VhostUserSetProtocolFeatures)?; + { + self.vu + .set_protocol_features(acked_protocol_features) + .map_err(Error::VhostUserSetProtocolFeatures)?; - if acked_protocol_features.contains(VhostUserProtocolFeatures::REPLY_ACK) { - self.vu.set_hdr_flags(VhostUserHeaderFlag::NEED_REPLY); - } + if acked_protocol_features.contains(VhostUserProtocolFeatures::REPLY_ACK) { + self.vu.set_hdr_flags(VhostUserHeaderFlag::NEED_REPLY); } } diff --git a/virtio-devices/src/vsock/unix/muxer.rs b/virtio-devices/src/vsock/unix/muxer.rs index 58df496e20..842e02677d 100644 --- a/virtio-devices/src/vsock/unix/muxer.rs +++ b/virtio-devices/src/vsock/unix/muxer.rs @@ -437,10 +437,10 @@ impl VsockMuxer { if let Some(EpollListener::LocalStream(stream)) = self.listener_map.get_mut(&fd) { let port = Self::read_local_stream_port(&mut self.partial_command_map, stream); - if let Err(Error::UnixRead(ref e)) = port { - if e.kind() == ErrorKind::WouldBlock { - return; - } + if let Err(Error::UnixRead(ref e)) = port + && e.kind() == ErrorKind::WouldBlock + { + return; } let stream = match self.remove_listener(fd) { diff --git a/virtio-devices/src/vsock/unix/muxer_killq.rs b/virtio-devices/src/vsock/unix/muxer_killq.rs index 5295ab69e1..b9cf47f4d1 100644 --- a/virtio-devices/src/vsock/unix/muxer_killq.rs +++ b/virtio-devices/src/vsock/unix/muxer_killq.rs @@ -111,11 +111,12 @@ impl MuxerKillQ { /// the queue has expired. Otherwise, `None` is returned. /// pub fn pop(&mut self) -> Option { - if let Some(item) = self.q.front() { - if Instant::now() > item.kill_time { - return Some(self.q.pop_front().unwrap().key); - } + if let Some(item) = self.q.front() + && Instant::now() > item.kill_time + { + return Some(self.q.pop_front().unwrap().key); } + None } diff --git a/vm-allocator/src/address.rs b/vm-allocator/src/address.rs index 9a72afdf98..c847dd473a 100644 --- a/vm-allocator/src/address.rs +++ b/vm-allocator/src/address.rs @@ -196,10 +196,10 @@ impl AddressAllocator { /// Free an already allocated address range. /// We can only free a range if it matches exactly an already allocated range. pub fn free(&mut self, address: GuestAddress, size: GuestUsize) { - if let Some(&range_size) = self.ranges.get(&address) { - if size == range_size { - self.ranges.remove(&address); - } + if let Some(&range_size) = self.ranges.get(&address) + && size == range_size + { + self.ranges.remove(&address); } } diff --git a/vmm/src/config.rs b/vmm/src/config.rs index 1614dc0b51..366b369e76 100644 --- a/vmm/src/config.rs +++ b/vmm/src/config.rs @@ -1248,10 +1248,11 @@ impl DiskConfig { return Err(ValidationError::InvalidPciSegment(self.pci_segment)); } - if let Some(iommu_segments) = platform_config.iommu_segments.as_ref() { - if iommu_segments.contains(&self.pci_segment) && !self.iommu { - return Err(ValidationError::OnIommuSegment(self.pci_segment)); - } + if let Some(iommu_segments) = platform_config.iommu_segments.as_ref() + && iommu_segments.contains(&self.pci_segment) + && !self.iommu + { + return Err(ValidationError::OnIommuSegment(self.pci_segment)); } } @@ -1260,13 +1261,13 @@ impl DiskConfig { } // Check Block device serial length - if let Some(ref serial) = self.serial { - if serial.len() > VIRTIO_BLK_ID_BYTES as usize { - return Err(ValidationError::InvalidSerialLength( - serial.len(), - VIRTIO_BLK_ID_BYTES as usize, - )); - } + if let Some(ref serial) = self.serial + && serial.len() > VIRTIO_BLK_ID_BYTES as usize + { + return Err(ValidationError::InvalidSerialLength( + serial.len(), + VIRTIO_BLK_ID_BYTES as usize, + )); } Ok(()) @@ -1496,17 +1497,18 @@ impl NetConfig { return Err(ValidationError::InvalidPciSegment(self.pci_segment)); } - if let Some(iommu_segments) = platform_config.iommu_segments.as_ref() { - if iommu_segments.contains(&self.pci_segment) && !self.iommu { - return Err(ValidationError::OnIommuSegment(self.pci_segment)); - } + if let Some(iommu_segments) = platform_config.iommu_segments.as_ref() + && iommu_segments.contains(&self.pci_segment) + && !self.iommu + { + return Err(ValidationError::OnIommuSegment(self.pci_segment)); } } - if let Some(mtu) = self.mtu { - if mtu < virtio_devices::net::MIN_MTU { - return Err(ValidationError::InvalidMtu(mtu)); - } + if let Some(mtu) = self.mtu + && mtu < virtio_devices::net::MIN_MTU + { + return Err(ValidationError::InvalidMtu(mtu)); } if !self.offload_csum && (self.offload_tso || self.offload_ufo) { @@ -1633,12 +1635,12 @@ impl FsConfig { return Err(ValidationError::InvalidPciSegment(self.pci_segment)); } - if let Some(iommu_segments) = platform_config.iommu_segments.as_ref() { - if iommu_segments.contains(&self.pci_segment) { - return Err(ValidationError::IommuNotSupportedOnSegment( - self.pci_segment, - )); - } + if let Some(iommu_segments) = platform_config.iommu_segments.as_ref() + && iommu_segments.contains(&self.pci_segment) + { + return Err(ValidationError::IommuNotSupportedOnSegment( + self.pci_segment, + )); } } @@ -1795,10 +1797,11 @@ impl PmemConfig { return Err(ValidationError::InvalidPciSegment(self.pci_segment)); } - if let Some(iommu_segments) = platform_config.iommu_segments.as_ref() { - if iommu_segments.contains(&self.pci_segment) && !self.iommu { - return Err(ValidationError::OnIommuSegment(self.pci_segment)); - } + if let Some(iommu_segments) = platform_config.iommu_segments.as_ref() + && iommu_segments.contains(&self.pci_segment) + && !self.iommu + { + return Err(ValidationError::OnIommuSegment(self.pci_segment)); } } @@ -1895,17 +1898,18 @@ impl DebugConsoleConfig { return Err(Error::ParseConsoleInvalidModeGiven); } - if parser.is_set("iobase") { - if let Some(iobase_opt) = parser.get("iobase") { - if !iobase_opt.starts_with("0x") { - return Err(Error::Validation(ValidationError::InvalidIoPortHex( - iobase_opt, - ))); - } - iobase = Some(u16::from_str_radix(&iobase_opt[2..], 16).map_err(|_| { + if parser.is_set("iobase") + && let Some(iobase_opt) = parser.get("iobase") + { + if !iobase_opt.starts_with("0x") { + return Err(Error::Validation(ValidationError::InvalidIoPortHex( + iobase_opt, + ))); + } + iobase = + Some(u16::from_str_radix(&iobase_opt[2..], 16).map_err(|_| { Error::Validation(ValidationError::InvalidIoPortHex(iobase_opt)) })?); - } } Ok(Self { file, mode, iobase }) @@ -1957,10 +1961,11 @@ impl DeviceConfig { return Err(ValidationError::InvalidPciSegment(self.pci_segment)); } - if let Some(iommu_segments) = platform_config.iommu_segments.as_ref() { - if iommu_segments.contains(&self.pci_segment) && !self.iommu { - return Err(ValidationError::OnIommuSegment(self.pci_segment)); - } + if let Some(iommu_segments) = platform_config.iommu_segments.as_ref() + && iommu_segments.contains(&self.pci_segment) + && !self.iommu + { + return Err(ValidationError::OnIommuSegment(self.pci_segment)); } } @@ -2000,12 +2005,12 @@ impl UserDeviceConfig { return Err(ValidationError::InvalidPciSegment(self.pci_segment)); } - if let Some(iommu_segments) = platform_config.iommu_segments.as_ref() { - if iommu_segments.contains(&self.pci_segment) { - return Err(ValidationError::IommuNotSupportedOnSegment( - self.pci_segment, - )); - } + if let Some(iommu_segments) = platform_config.iommu_segments.as_ref() + && iommu_segments.contains(&self.pci_segment) + { + return Err(ValidationError::IommuNotSupportedOnSegment( + self.pci_segment, + )); } } @@ -2062,10 +2067,11 @@ impl VdpaConfig { return Err(ValidationError::InvalidPciSegment(self.pci_segment)); } - if let Some(iommu_segments) = platform_config.iommu_segments.as_ref() { - if iommu_segments.contains(&self.pci_segment) && !self.iommu { - return Err(ValidationError::OnIommuSegment(self.pci_segment)); - } + if let Some(iommu_segments) = platform_config.iommu_segments.as_ref() + && iommu_segments.contains(&self.pci_segment) + && !self.iommu + { + return Err(ValidationError::OnIommuSegment(self.pci_segment)); } } @@ -2121,10 +2127,11 @@ impl VsockConfig { return Err(ValidationError::InvalidPciSegment(self.pci_segment)); } - if let Some(iommu_segments) = platform_config.iommu_segments.as_ref() { - if iommu_segments.contains(&self.pci_segment) && !self.iommu { - return Err(ValidationError::OnIommuSegment(self.pci_segment)); - } + if let Some(iommu_segments) = platform_config.iommu_segments.as_ref() + && iommu_segments.contains(&self.pci_segment) + && !self.iommu + { + return Err(ValidationError::OnIommuSegment(self.pci_segment)); } } @@ -2493,10 +2500,10 @@ impl VmConfig { { let host_data_opt = &self.payload.as_ref().unwrap().host_data; - if let Some(host_data) = host_data_opt { - if host_data.len() != 64 { - return Err(ValidationError::InvalidHostData); - } + if let Some(host_data) = host_data_opt + && host_data.len() != 64 + { + return Err(ValidationError::InvalidHostData); } } // The 'conflict' check is introduced in commit 24438e0390d3 @@ -2675,10 +2682,10 @@ impl VmConfig { } } - if let Some(vsock) = &self.vsock { - if [!0, 0, 1, 2].contains(&vsock.cid) { - return Err(ValidationError::VsockSpecialCid(vsock.cid)); - } + if let Some(vsock) = &self.vsock + && [!0, 0, 1, 2].contains(&vsock.cid) + { + return Err(ValidationError::VsockSpecialCid(vsock.cid)); } if let Some(balloon) = &self.balloon { @@ -3080,11 +3087,11 @@ impl VmConfig { } // Remove if vsock device - if let Some(vsock) = self.vsock.as_ref() { - if vsock.id.as_ref().map(|id| id.as_ref()) == Some(id) { - self.vsock = None; - removed = true; - } + if let Some(vsock) = self.vsock.as_ref() + && vsock.id.as_ref().map(|id| id.as_ref()) == Some(id) + { + self.vsock = None; + removed = true; } removed diff --git a/vmm/src/cpu.rs b/vmm/src/cpu.rs index 5ae2f6da26..f36467735b 100644 --- a/vmm/src/cpu.rs +++ b/vmm/src/cpu.rs @@ -601,10 +601,10 @@ impl BusDevice for CpuManager { state.removing = false; } // Trigger removal of vCPU - if data[0] & (1 << CPU_EJECT_FLAG) == 1 << CPU_EJECT_FLAG { - if let Err(e) = self.remove_vcpu(self.selected_cpu as u32) { - error!("Error removing vCPU: {:?}", e); - } + if data[0] & (1 << CPU_EJECT_FLAG) == 1 << CPU_EJECT_FLAG + && let Err(e) = self.remove_vcpu(self.selected_cpu as u32) + { + error!("Error removing vCPU: {:?}", e); } } else { warn!("Out of range vCPU id: {}", self.selected_cpu); @@ -1059,14 +1059,13 @@ impl CpuManager { } // Apply seccomp filter for vcpu thread. - if !vcpu_seccomp_filter.is_empty() { - if let Err(e) = + if !vcpu_seccomp_filter.is_empty() && let Err(e) = apply_filter(&vcpu_seccomp_filter).map_err(Error::ApplySeccompFilter) { error!("Error applying seccomp filter: {:?}", e); return; } - } + extern "C" fn handle_signal(_: i32, _: *mut siginfo_t, _: *mut c_void) {} // This uses an async signal safe handler to kill the vcpu handles. register_signal_handler(SIGRTMIN(), handle_signal) diff --git a/vmm/src/device_manager.rs b/vmm/src/device_manager.rs index 3bc814fd77..40a3d27cee 100644 --- a/vmm/src/device_manager.rs +++ b/vmm/src/device_manager.rs @@ -772,12 +772,13 @@ impl DeviceRelocation for AddressManager { if let Some(node) = self.device_tree.lock().unwrap().get_mut(&id) { let mut resource_updated = false; for resource in node.resources.iter_mut() { - if let Resource::PciBar { base, type_, .. } = resource { - if PciBarRegionType::from(*type_) == region_type && *base == old_base { - *base = new_base; - resource_updated = true; - break; - } + if let Resource::PciBar { base, type_, .. } = resource + && PciBarRegionType::from(*type_) == region_type + && *base == old_base + { + *base = new_base; + resource_updated = true; + break; } } @@ -814,43 +815,41 @@ impl DeviceRelocation for AddressManager { } else { let virtio_dev = virtio_pci_dev.virtio_device(); let mut virtio_dev = virtio_dev.lock().unwrap(); - if let Some(mut shm_regions) = virtio_dev.get_shm_regions() { - if shm_regions.addr.raw_value() == old_base { - let mem_region = self.vm.make_user_memory_region( - shm_regions.mem_slot, - old_base, - shm_regions.len, - shm_regions.host_addr, - false, - false, - ); - - self.vm.remove_user_memory_region(mem_region).map_err(|e| { - io::Error::other(format!("failed to remove user memory region: {e:?}")) - })?; + if let Some(mut shm_regions) = virtio_dev.get_shm_regions() + && shm_regions.addr.raw_value() == old_base + { + let mem_region = self.vm.make_user_memory_region( + shm_regions.mem_slot, + old_base, + shm_regions.len, + shm_regions.host_addr, + false, + false, + ); - // Create new mapping by inserting new region to KVM. - let mem_region = self.vm.make_user_memory_region( - shm_regions.mem_slot, - new_base, - shm_regions.len, - shm_regions.host_addr, - false, - false, - ); - - self.vm.create_user_memory_region(mem_region).map_err(|e| { - io::Error::other(format!("failed to create user memory regions: {e:?}")) - })?; + self.vm.remove_user_memory_region(mem_region).map_err(|e| { + io::Error::other(format!("failed to remove user memory region: {e:?}")) + })?; - // Update shared memory regions to reflect the new mapping. - shm_regions.addr = GuestAddress(new_base); - virtio_dev.set_shm_regions(shm_regions).map_err(|e| { - io::Error::other(format!( - "failed to update shared memory regions: {e:?}" - )) - })?; - } + // Create new mapping by inserting new region to KVM. + let mem_region = self.vm.make_user_memory_region( + shm_regions.mem_slot, + new_base, + shm_regions.len, + shm_regions.host_addr, + false, + false, + ); + + self.vm.create_user_memory_region(mem_region).map_err(|e| { + io::Error::other(format!("failed to create user memory regions: {e:?}")) + })?; + + // Update shared memory regions to reflect the new mapping. + shm_regions.addr = GuestAddress(new_base); + virtio_dev.set_shm_regions(shm_regions).map_err(|e| { + io::Error::other(format!("failed to update shared memory regions: {e:?}")) + })?; } } } @@ -1655,14 +1654,14 @@ impl DeviceManager { iommu_attached_devices.append(&mut vfio_user_iommu_device_ids); // Add all devices from forced iommu segments - if let Some(platform_config) = self.config.lock().unwrap().platform.as_ref() { - if let Some(iommu_segments) = platform_config.iommu_segments.as_ref() { - for segment in iommu_segments { - for device in 0..32 { - let bdf = PciBdf::new(*segment, 0, device, 0); - if !iommu_attached_devices.contains(&bdf) { - iommu_attached_devices.push(bdf); - } + if let Some(platform_config) = self.config.lock().unwrap().platform.as_ref() + && let Some(iommu_segments) = platform_config.iommu_segments.as_ref() + { + for segment in iommu_segments { + for device in 0..32 { + let bdf = PciBdf::new(*segment, 0, device, 0); + if !iommu_attached_devices.contains(&bdf) { + iommu_attached_devices.push(bdf); } } } @@ -4350,14 +4349,14 @@ impl DeviceManager { .add_memory_region(new_region) .map_err(DeviceManagerError::UpdateMemoryForVirtioDevice)?; - if let Some(dma_handler) = &handle.dma_handler { - if !handle.iommu { - let gpa = new_region.start_addr().0; - let size = new_region.len(); - dma_handler - .map(gpa, gpa, size) - .map_err(DeviceManagerError::VirtioDmaMap)?; - } + if let Some(dma_handler) = &handle.dma_handler + && !handle.iommu + { + let gpa = new_region.start_addr().0; + let size = new_region.len(); + dma_handler + .map(gpa, gpa, size) + .map_err(DeviceManagerError::VirtioDmaMap)?; } } @@ -4576,10 +4575,10 @@ impl DeviceManager { }; let mut iommu_attached = false; - if let Some((_, iommu_attached_devices)) = &self.iommu_attached_devices { - if iommu_attached_devices.contains(&pci_device_bdf) { - iommu_attached = true; - } + if let Some((_, iommu_attached_devices)) = &self.iommu_attached_devices + && iommu_attached_devices.contains(&pci_device_bdf) + { + iommu_attached = true; } let (pci_device, bus_device, virtio_device, remove_dma_handler) = match pci_device_handle { @@ -4610,16 +4609,16 @@ impl DeviceManager { .map_err(|e| DeviceManagerError::UnRegisterIoevent(e.into()))?; } - if let Some(dma_handler) = dev.dma_handler() { - if !iommu_attached { - for (_, zone) in self.memory_manager.lock().unwrap().memory_zones().iter() { - for region in zone.regions() { - let iova = region.start_addr().0; - let size = region.len(); - dma_handler - .unmap(iova, size) - .map_err(DeviceManagerError::VirtioDmaUnmap)?; - } + if let Some(dma_handler) = dev.dma_handler() + && !iommu_attached + { + for (_, zone) in self.memory_manager.lock().unwrap().memory_zones().iter() { + for region in zone.regions() { + let iova = region.start_addr().0; + let size = region.len(); + dma_handler + .unmap(iova, size) + .map_err(DeviceManagerError::VirtioDmaUnmap)?; } } } diff --git a/vmm/src/igvm/igvm_loader.rs b/vmm/src/igvm/igvm_loader.rs index 97297a0920..03dd4d2472 100644 --- a/vmm/src/igvm/igvm_loader.rs +++ b/vmm/src/igvm/igvm_loader.rs @@ -428,11 +428,11 @@ pub fn load_igvm( let gpas_grouped = gpas .iter() .fold(Vec::>::new(), |mut acc, gpa| { - if let Some(last_vec) = acc.last_mut() { - if last_vec[0].page_type == gpa.page_type { - last_vec.push(*gpa); - return acc; - } + if let Some(last_vec) = acc.last_mut() + && last_vec[0].page_type == gpa.page_type + { + last_vec.push(*gpa); + return acc; } acc.push(vec![*gpa]); acc diff --git a/vmm/src/lib.rs b/vmm/src/lib.rs index ef23793088..76b63d10be 100644 --- a/vmm/src/lib.rs +++ b/vmm/src/lib.rs @@ -729,15 +729,14 @@ impl Vmm { thread::Builder::new() .name("vmm_signal_handler".to_string()) .spawn(move || { - if !signal_handler_seccomp_filter.is_empty() { - if let Err(e) = apply_filter(&signal_handler_seccomp_filter) + if !signal_handler_seccomp_filter.is_empty() && let Err(e) = apply_filter(&signal_handler_seccomp_filter) .map_err(Error::ApplySeccompFilter) { error!("Error applying seccomp filter: {:?}", e); exit_evt.write(1).ok(); return; } - } + if landlock_enable{ match Landlock::new() { Ok(landlock) => { @@ -1834,10 +1833,10 @@ impl RequestHandler for Vmm { if let Some(desired_ram) = desired_ram { config.memory.size = desired_ram; } - if let Some(desired_balloon) = desired_balloon { - if let Some(balloon_config) = &mut config.balloon { - balloon_config.size = desired_balloon; - } + if let Some(desired_balloon) = desired_balloon + && let Some(balloon_config) = &mut config.balloon + { + balloon_config.size = desired_balloon; } Ok(()) } @@ -2306,16 +2305,16 @@ impl RequestHandler for Vmm { error!("Migration failed: {:?}", migration_err); // Stop logging dirty pages only for non-local migrations - if !send_data_migration.local { - if let Err(e) = vm.stop_dirty_log() { - return e; - } + if !send_data_migration.local + && let Err(e) = vm.stop_dirty_log() + { + return e; } - if vm.get_state().unwrap() == VmState::Paused { - if let Err(e) = vm.resume() { - return e; - } + if vm.get_state().unwrap() == VmState::Paused + && let Err(e) = vm.resume() + { + return e; } migration_err diff --git a/vmm/src/memory_manager.rs b/vmm/src/memory_manager.rs index 15225e9dc7..74765cf514 100644 --- a/vmm/src/memory_manager.rs +++ b/vmm/src/memory_manager.rs @@ -1959,23 +1959,21 @@ impl MemoryManager { } for region in memory_zone.regions() { - if snapshot { - if let Some(file_offset) = region.file_offset() { - if (region.flags() & libc::MAP_SHARED == libc::MAP_SHARED) - && Self::is_hardlink(file_offset.file()) - { - // In this very specific case, we know the memory - // region is backed by a file on the host filesystem - // that can be accessed by the user, and additionally - // the mapping is shared, which means that modifications - // to the content are written to the actual file. - // When meeting these conditions, we can skip the - // copy of the memory content for this specific region, - // as we can assume the user will have it saved through - // the backing file already. - continue; - } - } + if snapshot + && let Some(file_offset) = region.file_offset() + && (region.flags() & libc::MAP_SHARED == libc::MAP_SHARED) + && Self::is_hardlink(file_offset.file()) + { + // In this very specific case, we know the memory + // region is backed by a file on the host filesystem + // that can be accessed by the user, and additionally + // the mapping is shared, which means that modifications + // to the content are written to the actual file. + // When meeting these conditions, we can skip the + // copy of the memory content for this specific region, + // as we can assume the user will have it saved through + // the backing file already. + continue; } table.push(MemoryRange { diff --git a/vmm/src/serial_manager.rs b/vmm/src/serial_manager.rs index f05100b464..9c4255defa 100644 --- a/vmm/src/serial_manager.rs +++ b/vmm/src/serial_manager.rs @@ -432,12 +432,12 @@ impl Drop for SerialManager { if let Some(handle) = self.handle.take() { handle.join().ok(); } - if let ConsoleOutput::Socket(_) = self.in_file { - if let Some(socket_path) = self.socket_path.as_ref() { - std::fs::remove_file(socket_path.as_os_str()) - .map_err(Error::RemoveUnixSocket) - .ok(); - } + if let ConsoleOutput::Socket(_) = self.in_file + && let Some(socket_path) = self.socket_path.as_ref() + { + std::fs::remove_file(socket_path.as_os_str()) + .map_err(Error::RemoveUnixSocket) + .ok(); } } } diff --git a/vmm/src/vm.rs b/vmm/src/vm.rs index 0e85b1ddc1..af843f169e 100644 --- a/vmm/src/vm.rs +++ b/vmm/src/vm.rs @@ -3123,12 +3123,12 @@ impl GuestDebuggable for Vm { #[cfg(feature = "tdx")] { - if let Some(ref platform) = self.config.lock().unwrap().platform { - if platform.tdx { - return Err(GuestDebuggableError::Coredump(anyhow!( - "Coredump not possible with TDX VM" - ))); - } + if let Some(ref platform) = self.config.lock().unwrap().platform + && platform.tdx + { + return Err(GuestDebuggableError::Coredump(anyhow!( + "Coredump not possible with TDX VM" + ))); } } From 1179a1a1c92ee1aaddf4b339d873fea3000a37bd Mon Sep 17 00:00:00 2001 From: Philipp Schuster Date: Tue, 26 Aug 2025 08:48:37 +0200 Subject: [PATCH 173/294] vmm: refactor alignment Context [0]. [0] https://github.com/cloud-hypervisor/cloud-hypervisor/pull/7256#discussion_r2298538384 Signed-off-by: Philipp Schuster On-behalf-of: SAP philipp.schuster@sap.com --- vmm/src/vm.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/vmm/src/vm.rs b/vmm/src/vm.rs index af843f169e..836feb4707 100644 --- a/vmm/src/vm.rs +++ b/vmm/src/vm.rs @@ -1177,8 +1177,8 @@ impl Vm { let guest_memory = memory_manager.lock().as_ref().unwrap().guest_memory(); let mem = guest_memory.memory(); let alignment = 0x20_0000; - let aligned_kernel_addr = - (arch::layout::KERNEL_START.0 + (alignment - 1)) & !(alignment - 1); + // round up + let aligned_kernel_addr = arch::layout::KERNEL_START.0.div_ceil(alignment) * alignment; let entry_addr = { match linux_loader::loader::pe::PE::load( mem.deref(), From 4dd635a56a19ff8b5f44a4c0452fb1d84a273228 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Wed, 10 Sep 2025 23:01:57 +0000 Subject: [PATCH 174/294] build: Bump bitflags from 2.9.3 to 2.9.4 Bumps [bitflags](https://github.com/bitflags/bitflags) from 2.9.3 to 2.9.4. - [Release notes](https://github.com/bitflags/bitflags/releases) - [Changelog](https://github.com/bitflags/bitflags/blob/main/CHANGELOG.md) - [Commits](https://github.com/bitflags/bitflags/compare/2.9.3...2.9.4) --- updated-dependencies: - dependency-name: bitflags dependency-version: 2.9.4 dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] --- Cargo.lock | 30 +++++++++++++++--------------- Cargo.toml | 2 +- 2 files changed, 16 insertions(+), 16 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 0ee00fa852..1152eea5f9 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -298,9 +298,9 @@ checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a" [[package]] name = "bitflags" -version = "2.9.3" +version = "2.9.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "34efbcccd345379ca2868b2b2c9d3782e9cc58ba87bc7d79d5b53d9c9ae6f25d" +checksum = "2261d10cca569e4643e526d8dc2e62e433cc8aba21ab764233731f8d369bf394" [[package]] name = "block" @@ -526,7 +526,7 @@ dependencies = [ "anyhow", "arch", "bitfield-struct", - "bitflags 2.9.3", + "bitflags 2.9.4", "byteorder", "event_monitor", "hypervisor", @@ -639,7 +639,7 @@ version = "4.3.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "74351c3392ea1ff6cd2628e0042d268ac2371cb613252ff383b6dfa50d22fa79" dependencies = [ - "bitflags 2.9.3", + "bitflags 2.9.4", "libc", ] @@ -828,7 +828,7 @@ version = "0.7.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "71d66e32caf5dd59f561be0143e413e01d651bd8498eb9aa0be8c482c81c8d31" dependencies = [ - "bitflags 2.9.3", + "bitflags 2.9.4", "cfg-if", "log", "managed", @@ -1000,7 +1000,7 @@ version = "0.7.10" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "046fa2d4d00aea763528b4950358d0ead425372445dc8ff86312b3c69ff7727b" dependencies = [ - "bitflags 2.9.3", + "bitflags 2.9.4", "cfg-if", "libc", ] @@ -1077,7 +1077,7 @@ version = "0.22.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b702df98508cb63ad89dd9beb9f6409761b30edca10d48e57941d3f11513a006" dependencies = [ - "bitflags 2.9.3", + "bitflags 2.9.4", "kvm-bindings", "libc", "vmm-sys-util", @@ -1112,7 +1112,7 @@ version = "0.1.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c0ff37bd590ca25063e35af745c343cb7a0271906fb7b37e4813e8f79f00268d" dependencies = [ - "bitflags 2.9.3", + "bitflags 2.9.4", "libc", ] @@ -1293,7 +1293,7 @@ version = "0.30.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "74523f3a35e05aba87a1d978330aef40f67b0304ac79c1c00b294c9830543db6" dependencies = [ - "bitflags 2.9.3", + "bitflags 2.9.4", "cfg-if", "cfg_aliases", "libc", @@ -1834,7 +1834,7 @@ version = "0.38.44" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "fdb5bc1ae2baa591800df16c9ca78619bf65c0488b41b96ccec5d11220d8c154" dependencies = [ - "bitflags 2.9.3", + "bitflags 2.9.4", "errno", "libc", "linux-raw-sys 0.4.15", @@ -1847,7 +1847,7 @@ version = "1.0.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c71e83d6afe7ff64890ec6b71d6a69bb8a610ab78ce364b3352876bb4c801266" dependencies = [ - "bitflags 2.9.3", + "bitflags 2.9.4", "errno", "libc", "linux-raw-sys 0.9.4", @@ -2260,7 +2260,7 @@ version = "0.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b8db5bc783aad75202ad4cbcdc5e893cff1dd8fa24a1bcdb4de8998d3c4d169a" dependencies = [ - "bitflags 2.9.3", + "bitflags 2.9.4", "libc", "log", "serde", @@ -2278,7 +2278,7 @@ version = "0.14.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2a4dcad85a129d97d5d4b2f3c47a4affdeedd76bdcd02094bcb5d9b76cac2d05" dependencies = [ - "bitflags 2.9.3", + "bitflags 2.9.4", "libc", "uuid", "vm-memory", @@ -2451,7 +2451,7 @@ dependencies = [ "acpi_tables", "anyhow", "arch", - "bitflags 2.9.3", + "bitflags 2.9.4", "block", "blocking", "cfg-if", @@ -2845,7 +2845,7 @@ version = "0.39.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6f42320e61fe2cfd34354ecb597f86f413484a798ba44a8ca1165c58d42da6c1" dependencies = [ - "bitflags 2.9.3", + "bitflags 2.9.4", ] [[package]] diff --git a/Cargo.toml b/Cargo.toml index 9f78b3f8c6..3806e8ef19 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -137,7 +137,7 @@ serde_with = { version = "3.14.0", default-features = false } # other crates anyhow = "1.0.98" -bitflags = "2.9.3" +bitflags = "2.9.4" byteorder = "1.5.0" cfg-if = "1.0.0" clap = "4.5.47" From ad3f94638fd20ca480fe43b871bfa27f3f1e65d9 Mon Sep 17 00:00:00 2001 From: Bo Chen Date: Thu, 4 Sep 2025 21:41:06 +0000 Subject: [PATCH 175/294] docs: Clarify our policy on AI generated code Fix: #7162 Signed-off-by: Bo Chen --- CONTRIBUTING.md | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index cdd75e31a1..c77d3e36da 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -114,3 +114,13 @@ Signed-off-by: Sebastien Boeuf Then, after the corresponding PR is merged, GitHub will automatically close that issue when parsing the [commit message](https://help.github.com/articles/closing-issues-via-commit-messages/). + +## AI Generated Code + +Our policy is to decline any contributions known to contain contents +generated or derived from using Large Language Models (LLMs). This +includes ChatGPT, Gemini, Claude, Copilot and similar tools. + +The goal is to avoid ambiguity in license compliance and optimize the +use of limited project resources, especially for code review and +maintenance. This policy can be revisited as LLMs evolve and mature. From 43a6a914b89b39d8657fd2c14dda56213892e00f Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Thu, 11 Sep 2025 23:01:59 +0000 Subject: [PATCH 176/294] build: Bump ssh2 from 0.9.4 to 0.9.5 Bumps [ssh2](https://github.com/alexcrichton/ssh2-rs) from 0.9.4 to 0.9.5. - [Commits](https://github.com/alexcrichton/ssh2-rs/compare/0.9.4...0.9.5) --- updated-dependencies: - dependency-name: ssh2 dependency-version: 0.9.5 dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] --- Cargo.lock | 57 ++++++------------------------------------- test_infra/Cargo.toml | 2 +- 2 files changed, 8 insertions(+), 51 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 1152eea5f9..37ff098e1b 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -555,7 +555,7 @@ dependencies = [ "backtrace", "lazy_static", "mintex", - "parking_lot 0.12.1", + "parking_lot", "rustc-hash", "serde", "serde_json", @@ -985,15 +985,6 @@ dependencies = [ "hashbrown", ] -[[package]] -name = "instant" -version = "0.1.13" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e0242819d153cba4b4b05a5a8f2a7e9bbf97b6055b2a002b395c96b5ff3c0222" -dependencies = [ - "cfg-if", -] - [[package]] name = "io-uring" version = "0.7.10" @@ -1422,17 +1413,6 @@ version = "2.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f38d5652c16fde515bb1ecef450ab0f6a219d619a7274976324d5e377f7dceba" -[[package]] -name = "parking_lot" -version = "0.11.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7d17b78036a60663b797adeaee46f5c9dfebb86948d1255007a1d6be0271ff99" -dependencies = [ - "instant", - "lock_api", - "parking_lot_core 0.8.6", -] - [[package]] name = "parking_lot" version = "0.12.1" @@ -1440,21 +1420,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3742b2c103b9f06bc9fff0a37ff4912935851bee6d36f3c02bcc755bcfec228f" dependencies = [ "lock_api", - "parking_lot_core 0.9.9", -] - -[[package]] -name = "parking_lot_core" -version = "0.8.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "60a2cfe6f0ad2bfc16aefa463b497d5c7a5ecd44a23efa72aa342d90177356dc" -dependencies = [ - "cfg-if", - "instant", - "libc", - "redox_syscall 0.2.16", - "smallvec", - "winapi", + "parking_lot_core", ] [[package]] @@ -1465,7 +1431,7 @@ checksum = "4c42a9226546d68acdd9c0a280d17ce19bfe27a46bf68784e4066115788d008e" dependencies = [ "cfg-if", "libc", - "redox_syscall 0.4.1", + "redox_syscall", "smallvec", "windows-targets 0.48.5", ] @@ -1747,15 +1713,6 @@ dependencies = [ "vmm-sys-util", ] -[[package]] -name = "redox_syscall" -version = "0.2.16" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fb5a58c1855b4b6819d59012155603f0b22ad30cad752600aadfcb695265519a" -dependencies = [ - "bitflags 1.3.2", -] - [[package]] name = "redox_syscall" version = "0.4.1" @@ -1999,14 +1956,14 @@ dependencies = [ [[package]] name = "ssh2" -version = "0.9.4" +version = "0.9.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e7fe461910559f6d5604c3731d00d2aafc4a83d1665922e280f42f9a168d5455" +checksum = "2f84d13b3b8a0d4e91a2629911e951db1bb8671512f5c09d7d4ba34500ba68c8" dependencies = [ - "bitflags 1.3.2", + "bitflags 2.9.4", "libc", "libssh2-sys", - "parking_lot 0.11.2", + "parking_lot", ] [[package]] diff --git a/test_infra/Cargo.toml b/test_infra/Cargo.toml index fe3cba8eea..8fdefed15a 100644 --- a/test_infra/Cargo.toml +++ b/test_infra/Cargo.toml @@ -9,7 +9,7 @@ dirs = { workspace = true } epoll = { workspace = true } libc = { workspace = true } serde_json = { workspace = true } -ssh2 = { version = "0.9.4", features = ["vendored-openssl"] } +ssh2 = { version = "0.9.5", features = ["vendored-openssl"] } thiserror = { workspace = true } vmm-sys-util = { workspace = true } wait-timeout = { workspace = true } From 5f399fc06d5e3200f3edb84a1030d9fd7323dc36 Mon Sep 17 00:00:00 2001 From: Peter Oskolkov Date: Thu, 11 Sep 2025 22:46:49 +0000 Subject: [PATCH 177/294] vmm: fix hotplugging when max_cpus > 255 Signed-off-by: Peter Oskolkov --- vmm/src/cpu.rs | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/vmm/src/cpu.rs b/vmm/src/cpu.rs index f36467735b..43edf5015b 100644 --- a/vmm/src/cpu.rs +++ b/vmm/src/cpu.rs @@ -524,7 +524,7 @@ pub struct CpuManager { #[cfg(feature = "guest_debug")] vm_debug_evt: EventFd, vcpu_states: Vec, - selected_cpu: u8, + selected_cpu: u32, vcpus: Vec>>, seccomp_action: SeccompAction, vm_ops: Arc, @@ -553,11 +553,13 @@ impl BusDevice for CpuManager { match offset { CPU_SELECTION_OFFSET => { - data[0] = self.selected_cpu; + assert!(data.len() >= core::mem::size_of::()); + data[0..core::mem::size_of::()] + .copy_from_slice(&self.selected_cpu.to_le_bytes()); } CPU_STATUS_OFFSET => { - if (self.selected_cpu as u32) < self.max_vcpus() { - let state = &self.vcpu_states[usize::from(self.selected_cpu)]; + if self.selected_cpu < self.max_vcpus() { + let state = &self.vcpu_states[usize::try_from(self.selected_cpu).unwrap()]; if state.active() { data[0] |= 1 << CPU_ENABLE_FLAG; } @@ -583,11 +585,13 @@ impl BusDevice for CpuManager { fn write(&mut self, _base: u64, offset: u64, data: &[u8]) -> Option> { match offset { CPU_SELECTION_OFFSET => { - self.selected_cpu = data[0]; + assert!(data.len() >= core::mem::size_of::()); + self.selected_cpu = + u32::from_le_bytes(data[0..core::mem::size_of::()].try_into().unwrap()); } CPU_STATUS_OFFSET => { - if (self.selected_cpu as u32) < self.max_vcpus() { - let state = &mut self.vcpu_states[usize::from(self.selected_cpu)]; + if self.selected_cpu < self.max_vcpus() { + let state = &mut self.vcpu_states[usize::try_from(self.selected_cpu).unwrap()]; // The ACPI code writes back a 1 to acknowledge the insertion if (data[0] & (1 << CPU_INSERTING_FLAG) == 1 << CPU_INSERTING_FLAG) && state.inserting @@ -602,7 +606,7 @@ impl BusDevice for CpuManager { } // Trigger removal of vCPU if data[0] & (1 << CPU_EJECT_FLAG) == 1 << CPU_EJECT_FLAG - && let Err(e) = self.remove_vcpu(self.selected_cpu as u32) + && let Err(e) = self.remove_vcpu(self.selected_cpu) { error!("Error removing vCPU: {:?}", e); } From 95e3c8f8fd71170807cb3a653cdfd61b0e25914c Mon Sep 17 00:00:00 2001 From: Bo Chen Date: Thu, 11 Sep 2025 21:18:10 +0000 Subject: [PATCH 178/294] build: Release v48.0 Signed-off-by: Bo Chen --- Cargo.lock | 2 +- Cargo.toml | 2 +- release-notes.md | 249 ++++++++++++++++++++++++++++++++--------------- 3 files changed, 171 insertions(+), 82 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 37ff098e1b..d2f13bfd58 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -399,7 +399,7 @@ checksum = "f46ad14479a25103f283c0f10005961cf086d8dc42205bb44c46ac563475dca6" [[package]] name = "cloud-hypervisor" -version = "47.0.0" +version = "48.0.0" dependencies = [ "anyhow", "api_client", diff --git a/Cargo.toml b/Cargo.toml index 3806e8ef19..f154330808 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -7,7 +7,7 @@ edition = "2024" homepage = "https://github.com/cloud-hypervisor/cloud-hypervisor" license = "Apache-2.0 AND BSD-3-Clause" name = "cloud-hypervisor" -version = "47.0.0" +version = "48.0.0" # Minimum buildable version: # Keep in sync with version in .github/workflows/build.yaml # Policy on MSRV (see #4318): diff --git a/release-notes.md b/release-notes.md index 0eb024c966..4dc982e838 100644 --- a/release-notes.md +++ b/release-notes.md @@ -1,57 +1,69 @@ +- [v48.0](#v480) + - [Experimental `fw_cfg` Device Support](#experimental-fw_cfg-device-support) + - [Experimental `ivshmem` Device Support](#experimental-ivshmem-device-support) + - [Firmware Boot Support on `riscv64`](#firmware-boot-support-on-riscv64) + - [Increased vCPU Limit on x86_64/kvm](#increased-vcpu-limit-on-x86_64kvm) + - [Improved Block Performance with Small Block Sizes](#improved-block-performance-with-small-block-sizes) + - [Faster VM Pause Operation](#faster-vm-pause-operation) + - [Updated Documentation on Windows Guest Support](#updated-documentation-on-windows-guest-support) + - [Policy on AI Generated Code](#policy-on-ai-generated-code) + - [Removed SGX Support](#removed-sgx-support) + - [Notable Bug Fixes](#notable-bug-fixes) + - [Contributors](#contributors) - [v47.0](#v470) - [Block Device Error Reporting to the Guest](#block-device-error-reporting-to-the-guest) - [Nice Error Messages on Exit](#nice-error-messages-on-exit) - [Alphabetically Sorted CLI Options for ch-remote](#alphabetically-sorted-cli-options-for-ch-remote) - - [Notable Bug Fixes](#notable-bug-fixes) + - [Notable Bug Fixes](#notable-bug-fixes-1) - [Deprecations](#deprecations) - - [Contributors](#contributors) + - [Contributors](#contributors-1) - [v46.0](#v460) - [File-level Locking Support with `--disk`](#file-level-locking-support-with---disk) - [Improved Error Reporting with VM Resizing](#improved-error-reporting-with-vm-resizing) - [IPv6 Address Support with `--net`](#ipv6-address-support-with---net) - [Experimental AArch64 Support with the MSHV Hypervisor](#experimental-aarch64-support-with-the-mshv-hypervisor) - [Deprecated SGX Support](#deprecated-sgx-support) - - [Notable Bug Fixes](#notable-bug-fixes-1) - - [Contributors](#contributors-1) + - [Notable Bug Fixes](#notable-bug-fixes-2) + - [Contributors](#contributors-2) - [v45.0](#v450) - [Experimental `riscv64` Architecture Support](#experimental-riscv64-architecture-support) - [Alphabetically Sorted CLI Options](#alphabetically-sorted-cli-options) - [Improved Downtime of VM Live Migration](#improved-downtime-of-vm-live-migration) - - [Notable Bug Fixes](#notable-bug-fixes-2) - - [Contributors](#contributors-2) + - [Notable Bug Fixes](#notable-bug-fixes-3) + - [Contributors](#contributors-3) - [v44.0](#v440) - [Configurable `virtio-iommu` Address Width](#configurable-virtio-iommu-address-width) - [Notable Performance Improvements](#notable-performance-improvements) - [New Fuzzers](#new-fuzzers) - - [Notable Bug Fixes](#notable-bug-fixes-3) - - [Contributors](#contributors-3) + - [Notable Bug Fixes](#notable-bug-fixes-4) + - [Contributors](#contributors-4) - [v43.0](#v430) - [Live Migration over TCP Connections](#live-migration-over-tcp-connections) - [Notable Performance Improvements](#notable-performance-improvements-1) - - [Notable Bug Fixes](#notable-bug-fixes-4) - - [Contributors](#contributors-4) + - [Notable Bug Fixes](#notable-bug-fixes-5) + - [Contributors](#contributors-5) - [v42.0](#v420) - [SVE/SVE2 Support on AArch64](#svesve2-support-on-aarch64) - - [Notable Bug Fixes](#notable-bug-fixes-5) + - [Notable Bug Fixes](#notable-bug-fixes-6) - [Sponsorships](#sponsorships) - - [Contributors](#contributors-5) + - [Contributors](#contributors-6) - [v41.0](#v410) - [Experimental "Pvmemcontrol" Support](#experimental-pvmemcontrol-support) - [Sandboxing With Landlock Support](#sandboxing-with-landlock-support) - [Notable Performance Improvements](#notable-performance-improvements-2) - - [Notable Bug Fixes](#notable-bug-fixes-6) - - [Contributors](#contributors-6) -- [v40.0](#v400) - - [Support for Restoring File Descriptor Backed Network Devices](#support-for-restoring-file-descriptor-backed-network-devices) - [Notable Bug Fixes](#notable-bug-fixes-7) - [Contributors](#contributors-7) +- [v40.0](#v400) + - [Support for Restoring File Descriptor Backed Network Devices](#support-for-restoring-file-descriptor-backed-network-devices) + - [Notable Bug Fixes](#notable-bug-fixes-8) + - [Contributors](#contributors-8) - [v39.0](#v390) - [Variable Sizing of PCI Apertures for Segments](#variable-sizing-of-pci-apertures-for-segments) - [Direct Booting with bzImages](#direct-booting-with-bzimages) - [Support for NVIDIA GPUDirect P2P Support](#support-for-nvidia-gpudirect-p2p-support) - [Guest NMI Injection Support](#guest-nmi-injection-support) - - [Notable Bug Fixes](#notable-bug-fixes-8) - - [Contributors](#contributors-8) + - [Notable Bug Fixes](#notable-bug-fixes-9) + - [Contributors](#contributors-9) - [v38.0](#v380) - [Group Rate Limiter on Block Devices](#group-rate-limiter-on-block-devices) - [CPU Pinning Support for Block Device Worker Thread](#cpu-pinning-support-for-block-device-worker-thread) @@ -59,16 +71,16 @@ - [New 'debug-console' Device](#new-debug-console-device) - [Improved VFIO Device Support](#improved-vfio-device-support) - [Extended CPU Affinity Support](#extended-cpu-affinity-support) - - [Notable Bug Fixes](#notable-bug-fixes-9) - - [Contributors](#contributors-9) + - [Notable Bug Fixes](#notable-bug-fixes-10) + - [Contributors](#contributors-10) - [v37.0](#v370) - [Long Term Support (LTS) Release](#long-term-support-lts-release) - [Multiple PCI segments Support for 32-bit VFIO devices](#multiple-pci-segments-support-for-32-bit-vfio-devices) - [Configurable Named TAP Devices](#configurable-named-tap-devices) - [TTY Output from Both Serial Device and Virtio Console](#tty-output-from-both-serial-device-and-virtio-console) - [Faster VM Restoration from Snapshots](#faster-vm-restoration-from-snapshots) - - [Notable Bug Fixes](#notable-bug-fixes-10) - - [Contributors](#contributors-10) + - [Notable Bug Fixes](#notable-bug-fixes-11) + - [Contributors](#contributors-11) - [v36.0](#v360) - [Command Line Changes](#command-line-changes) - [Enabled Features Reported via API Endpoint and CLI](#enabled-features-reported-via-api-endpoint-and-cli) @@ -77,31 +89,31 @@ - [Unix Socket Backend for Serial Port](#unix-socket-backend-for-serial-port) - [AIO Backend for Block Devices](#aio-backend-for-block-devices) - [Documentation Improvements](#documentation-improvements) - - [Notable Bug Fixes](#notable-bug-fixes-11) - - [Contributors](#contributors-11) + - [Notable Bug Fixes](#notable-bug-fixes-12) + - [Contributors](#contributors-12) - [v35.0](#v350) - [`virtio-vsock` Support for Linux Guest Kernel v6.3+](#virtio-vsock-support-for-linux-guest-kernel-v63) - [User Specified Serial Number for `virtio-block`](#user-specified-serial-number-for-virtio-block) - [vCPU TSC Frequency Included in Migration State](#vcpu-tsc-frequency-included-in-migration-state) - - [Notable Bug Fixes](#notable-bug-fixes-12) - - [Contributors](#contributors-12) + - [Notable Bug Fixes](#notable-bug-fixes-13) + - [Contributors](#contributors-13) - [v34.0](#v340) - [Paravirtualised Panic Device Support](#paravirtualised-panic-device-support) - [Improvements to VM Core Dump](#improvements-to-vm-core-dump) - [QCOW2 Support for Backing Files](#qcow2-support-for-backing-files) - [Minimum Host Kernel Bump](#minimum-host-kernel-bump) - - [Notable Bug Fixes](#notable-bug-fixes-13) - - [Contributors](#contributors-13) + - [Notable Bug Fixes](#notable-bug-fixes-14) + - [Contributors](#contributors-14) - [v33.0](#v330) - [D-Bus based API](#d-bus-based-api) - [Expose Host CPU Cache Details for AArch64](#expose-host-cpu-cache-details-for-aarch64) - - [Notable Bug Fixes](#notable-bug-fixes-14) - - [Contributors](#contributors-14) + - [Notable Bug Fixes](#notable-bug-fixes-15) + - [Contributors](#contributors-15) - [v32.0](#v320) - [Increased PCI Segment Limit](#increased-pci-segment-limit) - [API Changes](#api-changes) - - [Notable Bug Fixes](#notable-bug-fixes-15) - - [Contributors](#contributors-15) + - [Notable Bug Fixes](#notable-bug-fixes-16) + - [Contributors](#contributors-16) - [v31.1](#v311) - [v31.0](#v310) - [Update to Latest `acpi_tables`](#update-to-latest-acpi_tables) @@ -109,15 +121,15 @@ - [Improvements on Console `SIGWINCH` Handler](#improvements-on-console-sigwinch-handler) - [Remove Directory Support from `MemoryZoneConfig::file`](#remove-directory-support-from-memoryzoneconfigfile) - [Documentation Improvements](#documentation-improvements-1) - - [Notable Bug Fixes](#notable-bug-fixes-16) - - [Contributors](#contributors-16) + - [Notable Bug Fixes](#notable-bug-fixes-17) + - [Contributors](#contributors-17) - [v30.0](#v300) - [Command Line Changes for Reduced Binary Size](#command-line-changes-for-reduced-binary-size) - [Basic vfio-user Server Support](#basic-vfio-user-server-support) - [Heap Profiling Support](#heap-profiling-support) - [Documentation Improvements](#documentation-improvements-2) - - [Notable Bug Fixes](#notable-bug-fixes-17) - - [Contributors](#contributors-17) + - [Notable Bug Fixes](#notable-bug-fixes-18) + - [Contributors](#contributors-18) - [v28.2](#v282) - [v29.0](#v290) - [Release Binary Supports Both MSHV and KVM](#release-binary-supports-both-mshv-and-kvm) @@ -127,10 +139,10 @@ - [`AArch64` Documentation Integration](#aarch64-documentation-integration) - [`virtio-block` Counters Enhancement](#virtio-block-counters-enhancement) - [TCP Offload Control](#tcp-offload-control) - - [Notable Bug Fixes](#notable-bug-fixes-18) + - [Notable Bug Fixes](#notable-bug-fixes-19) - [Removals](#removals) - [Deprecations](#deprecations-1) - - [Contributors](#contributors-18) + - [Contributors](#contributors-19) - [v28.1](#v281) - [v28.0](#v280) - [Community Engagement (Reminder)](#community-engagement-reminder) @@ -138,9 +150,9 @@ - [Virtualised TPM Support](#virtualised-tpm-support) - [Transparent Huge Page Support](#transparent-huge-page-support) - [README Quick Start Improved](#readme-quick-start-improved) - - [Notable Bug Fixes](#notable-bug-fixes-19) + - [Notable Bug Fixes](#notable-bug-fixes-20) - [Removals](#removals-1) - - [Contributors](#contributors-19) + - [Contributors](#contributors-20) - [v27.0](#v270) - [Community Engagement](#community-engagement) - [Prebuilt Packages](#prebuilt-packages) @@ -149,41 +161,41 @@ - [Simplified Build Feature Flags](#simplified-build-feature-flags) - [Asynchronous Kernel Loading](#asynchronous-kernel-loading) - [GDB Support for AArch64](#gdb-support-for-aarch64) - - [Notable Bug Fixes](#notable-bug-fixes-20) + - [Notable Bug Fixes](#notable-bug-fixes-21) - [Deprecations](#deprecations-2) - - [Contributors](#contributors-20) + - [Contributors](#contributors-21) - [v26.0](#v260) - [SMBIOS Improvements via `--platform`](#smbios-improvements-via---platform) - [Unified Binary MSHV and KVM Support](#unified-binary-mshv-and-kvm-support) - - [Notable Bug Fixes](#notable-bug-fixes-21) + - [Notable Bug Fixes](#notable-bug-fixes-22) - [Deprecations](#deprecations-3) - [Removals](#removals-2) - - [Contributors](#contributors-21) + - [Contributors](#contributors-22) - [v25.0](#v250) - [`ch-remote` Improvements](#ch-remote-improvements-1) - [VM "Coredump" Support](#vm-coredump-support) - - [Notable Bug Fixes](#notable-bug-fixes-22) + - [Notable Bug Fixes](#notable-bug-fixes-23) - [Removals](#removals-3) - - [Contributors](#contributors-22) + - [Contributors](#contributors-23) - [v24.0](#v240) - [Bypass Mode for `virtio-iommu`](#bypass-mode-for-virtio-iommu) - [Ensure Identifiers Uniqueness](#ensure-identifiers-uniqueness) - [Sparse Mmap support](#sparse-mmap-support) - [Expose Platform Serial Number](#expose-platform-serial-number) - - [Notable Bug Fixes](#notable-bug-fixes-23) + - [Notable Bug Fixes](#notable-bug-fixes-24) - [Notable Improvements](#notable-improvements) - [Deprecations](#deprecations-4) - [New on the Website](#new-on-the-website) - - [Contributors](#contributors-23) + - [Contributors](#contributors-24) - [v23.1](#v231) - [v23.0](#v230) - [vDPA Support](#vdpa-support) - [Updated OS Support list](#updated-os-support-list) - [`AArch64` Memory Map Improvements](#aarch64-memory-map-improvements) - [`AMX` Support](#amx-support) - - [Notable Bug Fixes](#notable-bug-fixes-24) + - [Notable Bug Fixes](#notable-bug-fixes-25) - [Deprecations](#deprecations-5) - - [Contributors](#contributors-24) + - [Contributors](#contributors-25) - [v22.1](#v221) - [v22.0](#v220) - [GDB Debug Stub Support](#gdb-debug-stub-support) @@ -194,13 +206,13 @@ - [PMU Support for AArch64](#pmu-support-for-aarch64) - [Documentation Under CC-BY-4.0 License](#documentation-under-cc-by-40-license) - [Deprecation of "Classic" `virtiofsd`](#deprecation-of-classic-virtiofsd) - - [Notable Bug Fixes](#notable-bug-fixes-25) - - [Contributors](#contributors-25) + - [Notable Bug Fixes](#notable-bug-fixes-26) + - [Contributors](#contributors-26) - [v21.0](#v210) - [Efficient Local Live Migration (for Live Upgrade)](#efficient-local-live-migration-for-live-upgrade) - [Recommended Kernel is Now 5.15](#recommended-kernel-is-now-515) - - [Notable Bug fixes](#notable-bug-fixes-26) - - [Contributors](#contributors-26) + - [Notable Bug fixes](#notable-bug-fixes-27) + - [Contributors](#contributors-27) - [v20.2](#v202) - [v20.1](#v201) - [v20.0](#v200) @@ -209,8 +221,8 @@ - [Improved VFIO support](#improved-vfio-support) - [Safer code](#safer-code) - [Extended documentation](#extended-documentation) - - [Notable bug fixes](#notable-bug-fixes-27) - - [Contributors](#contributors-27) + - [Notable bug fixes](#notable-bug-fixes-28) + - [Contributors](#contributors-28) - [v19.0](#v190) - [Improved PTY handling for serial and `virtio-console`](#improved-pty-handling-for-serial-and-virtio-console) - [PCI boot time optimisations](#pci-boot-time-optimisations) @@ -218,8 +230,8 @@ - [Live migration enhancements](#live-migration-enhancements) - [`virtio-mem` support with `vfio-user`](#virtio-mem-support-with-vfio-user) - [AArch64 for `virtio-iommu`](#aarch64-for-virtio-iommu) - - [Notable bug fixes](#notable-bug-fixes-28) - - [Contributors](#contributors-28) + - [Notable bug fixes](#notable-bug-fixes-29) + - [Contributors](#contributors-29) - [v18.0](#v180) - [Experimental User Device (`vfio-user`) support](#experimental-user-device-vfio-user-support) - [Migration support for `vhost-user` devices](#migration-support-for-vhost-user-devices) @@ -229,23 +241,23 @@ - [Live migration on MSHV hypervisor](#live-migration-on-mshv-hypervisor) - [AArch64 CPU topology support](#aarch64-cpu-topology-support) - [Power button support on AArch64](#power-button-support-on-aarch64) - - [Notable bug fixes](#notable-bug-fixes-29) - - [Contributors](#contributors-29) + - [Notable bug fixes](#notable-bug-fixes-30) + - [Contributors](#contributors-30) - [v17.0](#v170) - [ARM64 NUMA support using ACPI](#arm64-numa-support-using-acpi) - [`Seccomp` support for MSHV backend](#seccomp-support-for-mshv-backend) - [Hotplug of `macvtap` devices](#hotplug-of-macvtap-devices) - [Improved SGX support](#improved-sgx-support) - [Inflight tracking for `vhost-user` devices](#inflight-tracking-for-vhost-user-devices) - - [Notable bug fixes](#notable-bug-fixes-30) - - [Contributors](#contributors-30) + - [Notable bug fixes](#notable-bug-fixes-31) + - [Contributors](#contributors-31) - [v16.0](#v160) - [Improved live migration support](#improved-live-migration-support) - [Improved `vhost-user` support](#improved-vhost-user-support) - [ARM64 ACPI and UEFI support](#arm64-acpi-and-uefi-support) - - [Notable bug fixes](#notable-bug-fixes-31) + - [Notable bug fixes](#notable-bug-fixes-32) - [Removed functionality](#removed-functionality) - - [Contributors](#contributors-31) + - [Contributors](#contributors-32) - [v15.0](#v150) - [Version numbering and stability guarantees](#version-numbering-and-stability-guarantees) - [Network device rate limiting](#network-device-rate-limiting) @@ -253,7 +265,7 @@ - [`--api-socket` supports file descriptor parameter](#--api-socket-supports-file-descriptor-parameter) - [Bug fixes](#bug-fixes) - [Deprecations](#deprecations-6) - - [Contributors](#contributors-32) + - [Contributors](#contributors-33) - [v0.14.1](#v0141) - [v0.14.0](#v0140) - [Structured event monitoring](#structured-event-monitoring) @@ -263,7 +275,7 @@ - [PTY control for serial and `virtio-console`](#pty-control-for-serial-and-virtio-console) - [Block device rate limiting](#block-device-rate-limiting) - [Deprecations](#deprecations-7) - - [Contributors](#contributors-33) + - [Contributors](#contributors-34) - [v0.13.0](#v0130) - [Wider VFIO device support](#wider-vfio-device-support) - [Improved huge page support](#improved-huge-page-support) @@ -271,13 +283,13 @@ - [VHD disk image support](#vhd-disk-image-support) - [Improved Virtio device threading](#improved-virtio-device-threading) - [Clean shutdown support via synthetic power button](#clean-shutdown-support-via-synthetic-power-button) - - [Contributors](#contributors-34) + - [Contributors](#contributors-35) - [v0.12.0](#v0120) - [ARM64 enhancements](#arm64-enhancements) - [Removal of `vhost-user-net` and `vhost-user-block` self spawning](#removal-of-vhost-user-net-and-vhost-user-block-self-spawning) - [Migration of `vhost-user-fs` backend](#migration-of-vhost-user-fs-backend) - [Enhanced "info" API](#enhanced-info-api) - - [Contributors](#contributors-35) + - [Contributors](#contributors-36) - [v0.11.0](#v0110) - [`io_uring` support by default for `virtio-block`](#io_uring-support-by-default-for-virtio-block) - [Windows Guest Support](#windows-guest-support) @@ -289,15 +301,15 @@ - [Default Log Level Changed](#default-log-level-changed) - [New `--balloon` Parameter Added](#new---balloon-parameter-added) - [Experimental `virtio-watchdog` Support](#experimental-virtio-watchdog-support) - - [Notable Bug Fixes](#notable-bug-fixes-32) - - [Contributors](#contributors-36) + - [Notable Bug Fixes](#notable-bug-fixes-33) + - [Contributors](#contributors-37) - [v0.10.0](#v0100) - [`virtio-block` Support for Multiple Descriptors](#virtio-block-support-for-multiple-descriptors) - [Memory Zones](#memory-zones) - [`Seccomp` Sandbox Improvements](#seccomp-sandbox-improvements) - [Preliminary KVM HyperV Emulation Control](#preliminary-kvm-hyperv-emulation-control) - - [Notable Bug Fixes](#notable-bug-fixes-33) - - [Contributors](#contributors-37) + - [Notable Bug Fixes](#notable-bug-fixes-34) + - [Contributors](#contributors-38) - [v0.9.0](#v090) - [`io_uring` Based Block Device Support](#io_uring-based-block-device-support) - [Block and Network Device Statistics](#block-and-network-device-statistics) @@ -310,17 +322,17 @@ - [Enhancements to ARM64 Support](#enhancements-to-arm64-support) - [Intel SGX Support](#intel-sgx-support) - [`Seccomp` Sandbox Improvements](#seccomp-sandbox-improvements-1) - - [Notable Bug Fixes](#notable-bug-fixes-34) - - [Contributors](#contributors-38) + - [Notable Bug Fixes](#notable-bug-fixes-35) + - [Contributors](#contributors-39) - [v0.8.0](#v080) - [Experimental Snapshot and Restore Support](#experimental-snapshot-and-restore-support) - [Experimental ARM64 Support](#experimental-arm64-support) - [Support for Using 5-level Paging in Guests](#support-for-using-5-level-paging-in-guests) - [Virtio Device Interrupt Suppression for Network Devices](#virtio-device-interrupt-suppression-for-network-devices) - [`vhost_user_fs` Improvements](#vhost_user_fs-improvements) - - [Notable Bug Fixes](#notable-bug-fixes-35) + - [Notable Bug Fixes](#notable-bug-fixes-36) - [Command Line and API Changes](#command-line-and-api-changes) - - [Contributors](#contributors-39) + - [Contributors](#contributors-40) - [v0.7.0](#v070) - [Block, Network, Persistent Memory (PMEM), VirtioFS and Vsock hotplug](#block-network-persistent-memory-pmem-virtiofs-and-vsock-hotplug) - [Alternative `libc` Support](#alternative-libc-support) @@ -330,14 +342,14 @@ - [`Seccomp` Sandboxing](#seccomp-sandboxing) - [Updated Distribution Support](#updated-distribution-support) - [Command Line and API Changes](#command-line-and-api-changes-1) - - [Contributors](#contributors-40) + - [Contributors](#contributors-41) - [v0.6.0](#v060) - [Directly Assigned Devices Hotplug](#directly-assigned-devices-hotplug) - [Shared Filesystem Improvements](#shared-filesystem-improvements) - [Block and Networking IO Self Offloading](#block-and-networking-io-self-offloading) - [Command Line Interface](#command-line-interface) - [PVH Boot](#pvh-boot) - - [Contributors](#contributors-41) + - [Contributors](#contributors-42) - [v0.5.1](#v051) - [v0.5.0](#v050) - [Virtual Machine Dynamic Resizing](#virtual-machine-dynamic-resizing) @@ -345,7 +357,7 @@ - [New Interrupt Management Framework](#new-interrupt-management-framework) - [Development Tools](#development-tools) - [Kata Containers Integration](#kata-containers-integration) - - [Contributors](#contributors-42) + - [Contributors](#contributors-43) - [v0.4.0](#v040) - [Dynamic virtual CPUs addition](#dynamic-virtual-cpus-addition) - [Programmatic firmware tables generation](#programmatic-firmware-tables-generation) @@ -354,7 +366,7 @@ - [Userspace IOAPIC by default](#userspace-ioapic-by-default) - [PCI BAR reprogramming](#pci-bar-reprogramming) - [New `cloud-hypervisor` organization](#new-cloud-hypervisor-organization) - - [Contributors](#contributors-43) + - [Contributors](#contributors-44) - [v0.3.0](#v030) - [Block device offloading](#block-device-offloading) - [Network device backend](#network-device-backend) @@ -381,6 +393,83 @@ - [Unit testing](#unit-testing) - [Integration tests parallelization](#integration-tests-parallelization) +# v48.0 + +This release has been tracked in [v48.0 +group](https://github.com/orgs/cloud-hypervisor/projects/6/views/4?filterQuery=release%3A%22Release+48%22) +of our [roadmap project](https://github.com/orgs/cloud-hypervisor/projects/6/). + +### Experimental `fw_cfg` Device Support + +This feature enables passing configuration data and files, such as VM +boot configurations (kernel, kernel cmdline, e820 memory map, and ACPI +tables), from the host to the guest. (#7117) + +### Experimental `ivshmem` Device Support + +Support for inter-VM shared memory has been added. For more information, +please refer to the [ivshmem documentation](docs/ivshmem.md). (#6703) + +### Firmware Boot Support on `riscv64` + +In addition to direct kernel boot, firmware boot support has been added +on `riscv64` hosts. (#7249) + +### Increased vCPU Limit on x86_64/kvm + +The maximum number of supported vCPUs on x86_64 hosts using KVM has been +raised from 254 to 8192. (#7299) + +### Improved Block Performance with Small Block Sizes + +Performance for `virtio-blk` with small block sizes (16KB and below) +is enhanced via submitting async IO requests in batches. (#7146) + +### Faster VM Pause Operation + +The VM pause operation now is significantly faster particularly for VMs +with a large number of vCPUs. (#7290) + +### Updated Documentation on Windows Guest Support + +Our Windows documentation now includes instructions to run Windows 11 +guests, in addition to Windows Server guests. (#7218) + +### Policy on AI Generated Code + +We will decline any contributions known to contain contents generated or +derived from using Large Language Models (LLMs). Details can be found +in our [contributing documentation](CONTRIBUTING.md). (#7162) + +### Removed SGX Support + +The SGX support has been removed, as announced in the deprecation notice two +release cycles ago. (#7093) + +### Notable Bug Fixes + +* Seccomp filter fixes with glibc v2.42 (#7327) +* Various fixes related to (#7331, #7334, #7335) + +### Contributors + +Many thanks to everyone who has contributed to our release: + +* Alex Orozco +* Alyssa Ross +* Anirudh Rayabharam +* Bo Chen +* Demi Marie Obenour +* Lucas Grosche +* Muminul Islam +* Oliver Anderson +* Peter Oskolkov +* Philipp Schuster +* Ruoqing He +* Shubham Chakrawar +* Songqian Li +* Wei Liu + # v47.0 This release has been tracked in [v47.0 From d002090afe026be92de03f799fd715e832854006 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Fri, 12 Sep 2025 23:01:52 +0000 Subject: [PATCH 179/294] build: Bump anyhow from 1.0.98 to 1.0.99 Bumps [anyhow](https://github.com/dtolnay/anyhow) from 1.0.98 to 1.0.99. - [Release notes](https://github.com/dtolnay/anyhow/releases) - [Commits](https://github.com/dtolnay/anyhow/compare/1.0.98...1.0.99) --- updated-dependencies: - dependency-name: anyhow dependency-version: 1.0.99 dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] --- Cargo.lock | 4 ++-- Cargo.toml | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index d2f13bfd58..14d441e388 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -85,9 +85,9 @@ dependencies = [ [[package]] name = "anyhow" -version = "1.0.98" +version = "1.0.99" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e16d2d3311acee920a9eb8d33b8cbc1787ce4a264e85f964c2404b969bdcd487" +checksum = "b0674a1ddeecb70197781e945de4b3b8ffb61fa939a5597bcf48503737663100" [[package]] name = "api_client" diff --git a/Cargo.toml b/Cargo.toml index f154330808..e68233b059 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -136,7 +136,7 @@ serde_json = "1.0.143" serde_with = { version = "3.14.0", default-features = false } # other crates -anyhow = "1.0.98" +anyhow = "1.0.99" bitflags = "2.9.4" byteorder = "1.5.0" cfg-if = "1.0.0" From 8fd852304217be179e5581743e39219ba3f5c31b Mon Sep 17 00:00:00 2001 From: Philipp Schuster Date: Fri, 27 Jun 2025 13:20:10 +0200 Subject: [PATCH 180/294] vmm: add info! message on graceful shutdown This message makes it easier to confirm a successful shutdown when scanning logs during development and in production. If the message is missing and the last log entry is not an error, the most likely cause is a livelock (e.g. contention on a lock) introduced while developing new Cloud Hypervisor features. Signed-off-by: Philipp Schuster On-behalf-of: SAP philipp.schuster@sap.com --- src/main.rs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/main.rs b/src/main.rs index 7bdcbfac8b..971d743142 100644 --- a/src/main.rs +++ b/src/main.rs @@ -15,7 +15,7 @@ use std::{env, io}; use clap::{Arg, ArgAction, ArgGroup, ArgMatches, Command}; use event_monitor::event; use libc::EFD_NONBLOCK; -use log::{LevelFilter, error, warn}; +use log::{LevelFilter, error, info, warn}; use option_parser::OptionParser; use seccompiler::SeccompAction; use signal_hook::consts::SIGSYS; @@ -891,6 +891,7 @@ fn main() { let exit_code = match start_vmm(cmd_arguments) { Ok(path) => { path.map(|s| std::fs::remove_file(s).ok()); + info!("Cloud Hypervisor exited successfully"); 0 } Err(top_error) => { From ad9a1878bfb79a9a64607bfa613ae916169e1bc2 Mon Sep 17 00:00:00 2001 From: Muminul Islam Date: Sat, 13 Sep 2025 00:35:49 -0700 Subject: [PATCH 181/294] hypervisor: remove invalid url Reference link to set_device_attribut for aarch64 is invalid. Looks like the code browsing does not have the reference anymore. Signed-off-by: Muminul Islam --- hypervisor/src/kvm/aarch64/gic/mod.rs | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/hypervisor/src/kvm/aarch64/gic/mod.rs b/hypervisor/src/kvm/aarch64/gic/mod.rs index cf4619bd7c..8bb79be2b5 100644 --- a/hypervisor/src/kvm/aarch64/gic/mod.rs +++ b/hypervisor/src/kvm/aarch64/gic/mod.rs @@ -216,9 +216,7 @@ impl KvmGicV3Its { 0, )?; - /* Finalize the GIC. - * See https://code.woboq.org/linux/linux/virt/kvm/arm/vgic/vgic-kvm-device.c.html#211. - */ + // Finalize the GIC. Self::set_device_attribute( &self.device, kvm_bindings::KVM_DEV_ARM_VGIC_GRP_CTRL, From c2cfa58221209948b8053994e7b68a36ae2cbd41 Mon Sep 17 00:00:00 2001 From: Philipp Schuster Date: Fri, 15 Aug 2025 10:46:44 +0200 Subject: [PATCH 182/294] ci: remove irrelevant CI for this fork Remove irrelevant/annoying CI here to accelerate development. --- .github/workflows/audit.yaml | 16 ---- .github/workflows/dco.yaml | 20 ---- .github/workflows/docker-image.yaml | 65 ------------- .github/workflows/formatting.yaml | 1 - .github/workflows/fuzz-build.yaml | 32 ------- .github/workflows/gitlint.yaml | 25 ----- .github/workflows/hadolint.yaml | 25 ----- .github/workflows/integration-arm64.yaml | 54 ----------- .github/workflows/lychee.yaml | 45 --------- .github/workflows/package-consistency.yaml | 32 ------- .../workflows/preview-riscv64-modules.yaml | 39 -------- .github/workflows/quality.yaml | 6 -- .github/workflows/release.yaml | 95 ------------------- 13 files changed, 455 deletions(-) delete mode 100644 .github/workflows/audit.yaml delete mode 100644 .github/workflows/dco.yaml delete mode 100644 .github/workflows/docker-image.yaml delete mode 100644 .github/workflows/fuzz-build.yaml delete mode 100644 .github/workflows/gitlint.yaml delete mode 100644 .github/workflows/hadolint.yaml delete mode 100644 .github/workflows/integration-arm64.yaml delete mode 100644 .github/workflows/lychee.yaml delete mode 100644 .github/workflows/package-consistency.yaml delete mode 100644 .github/workflows/preview-riscv64-modules.yaml delete mode 100644 .github/workflows/release.yaml diff --git a/.github/workflows/audit.yaml b/.github/workflows/audit.yaml deleted file mode 100644 index cfc21696e1..0000000000 --- a/.github/workflows/audit.yaml +++ /dev/null @@ -1,16 +0,0 @@ -name: Cloud Hypervisor Dependency Audit -on: - pull_request: - paths: - - '**/Cargo.toml' - - '**/Cargo.lock' - -jobs: - security_audit: - name: Audit - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v5 - - uses: actions-rust-lang/audit@v1 - with: - token: ${{ secrets.GITHUB_TOKEN }} diff --git a/.github/workflows/dco.yaml b/.github/workflows/dco.yaml deleted file mode 100644 index daf21315e0..0000000000 --- a/.github/workflows/dco.yaml +++ /dev/null @@ -1,20 +0,0 @@ -name: DCO -on: [pull_request, merge_group] - -jobs: - check: - name: DCO Check ("Signed-Off-By") - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v5 - - name: Set up Python 3.x - uses: actions/setup-python@v6 - with: - python-version: '3.x' - - name: Check DCO - if: ${{ github.event_name == 'pull_request' }} - env: - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - run: | - pip3 install -U dco-check - dco-check -e "49699333+dependabot[bot]@users.noreply.github.com" diff --git a/.github/workflows/docker-image.yaml b/.github/workflows/docker-image.yaml deleted file mode 100644 index a026eac2c6..0000000000 --- a/.github/workflows/docker-image.yaml +++ /dev/null @@ -1,65 +0,0 @@ -name: Cloud Hypervisor's Docker image update -on: - push: - branches: main - paths: resources/Dockerfile - pull_request: - paths: resources/Dockerfile -concurrency: - group: ${{ github.workflow }}-${{ github.ref }} - cancel-in-progress: true - -env: - REGISTRY: ghcr.io - IMAGE_NAME: ${{ github.repository }} - -jobs: - main: - runs-on: ubuntu-latest - steps: - - name: Code checkout - uses: actions/checkout@v5 - - - name: Set up QEMU - uses: docker/setup-qemu-action@v3 - - - name: Set up Docker Buildx - uses: docker/setup-buildx-action@v3 - - - name: Login to ghcr - uses: docker/login-action@v3 - with: - registry: ${{ env.REGISTRY }} - username: ${{ github.actor }} - password: ${{ secrets.GITHUB_TOKEN }} - - - - name: Docker meta - id: meta - uses: docker/metadata-action@v5 - with: - images: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }} - # generate Docker tags based on the following events/attributes - tags: | - type=raw,value=20250815-0 - type=sha - - - name: Build and push - if: ${{ github.event_name == 'push' }} - uses: docker/build-push-action@v6 - with: - file: ./resources/Dockerfile - platforms: linux/amd64,linux/arm64 - push: true - tags: ${{ steps.meta.outputs.tags }} - - - name: Build only - if: ${{ github.event_name == 'pull_request' }} - uses: docker/build-push-action@v6 - with: - file: ./resources/Dockerfile - platforms: linux/amd64,linux/arm64 - tags: ${{ steps.meta.outputs.tags }} - - - name: Image digest - run: echo ${{ steps.docker_build.outputs.digest }} diff --git a/.github/workflows/formatting.yaml b/.github/workflows/formatting.yaml index 75e4492559..37a0b3e6b5 100644 --- a/.github/workflows/formatting.yaml +++ b/.github/workflows/formatting.yaml @@ -14,7 +14,6 @@ jobs: - nightly target: - x86_64-unknown-linux-gnu - - aarch64-unknown-linux-musl env: RUSTFLAGS: -D warnings steps: diff --git a/.github/workflows/fuzz-build.yaml b/.github/workflows/fuzz-build.yaml deleted file mode 100644 index 427189b01e..0000000000 --- a/.github/workflows/fuzz-build.yaml +++ /dev/null @@ -1,32 +0,0 @@ -name: Cloud Hypervisor Cargo Fuzz Build -on: [pull_request, merge_group] -concurrency: - group: ${{ github.workflow }}-${{ github.ref }} - cancel-in-progress: true - -jobs: - build: - name: Cargo Fuzz Build - runs-on: ubuntu-latest - strategy: - matrix: - rust: - - nightly - target: - - x86_64-unknown-linux-gnu - env: - RUSTFLAGS: -D warnings - steps: - - name: Code checkout - uses: actions/checkout@v5 - - name: Install Rust toolchain (${{ matrix.rust }}) - uses: dtolnay/rust-toolchain@stable - with: - toolchain: ${{ matrix.rust }} - target: ${{ matrix.target }} - - name: Install Cargo fuzz - run: cargo install cargo-fuzz - - name: Fuzz Build - run: cargo fuzz build - - name: Fuzz Check - run: cargo fuzz check diff --git a/.github/workflows/gitlint.yaml b/.github/workflows/gitlint.yaml deleted file mode 100644 index 7c3c4f7e45..0000000000 --- a/.github/workflows/gitlint.yaml +++ /dev/null @@ -1,25 +0,0 @@ -name: Commit messages check -on: - pull_request: - -jobs: - gitlint: - name: Check commit messages - runs-on: ubuntu-latest - steps: - - name: Checkout repository - uses: actions/checkout@v5 - with: - ref: ${{ github.event.pull_request.head.sha }} - fetch-depth: 0 - - name: Set up Python 3.10 - uses: actions/setup-python@v6 - with: - python-version: "3.10" - - name: Install dependencies - run: | - python -m pip install --upgrade pip - pip install --upgrade gitlint - - name: Lint git commit messages - run: | - gitlint --commits origin/$GITHUB_BASE_REF.. diff --git a/.github/workflows/hadolint.yaml b/.github/workflows/hadolint.yaml deleted file mode 100644 index 641d911c0c..0000000000 --- a/.github/workflows/hadolint.yaml +++ /dev/null @@ -1,25 +0,0 @@ -name: Lint Dockerfile -on: - push: - paths: - - resources/Dockerfile - pull_request: - paths: - - resources/Dockerfile - -jobs: - hadolint: - name: Run Hadolint Dockerfile Linter - runs-on: ubuntu-latest - steps: - - name: Checkout code - uses: actions/checkout@v5 - - - name: Lint Dockerfile - uses: hadolint/hadolint-action@master - with: - dockerfile: ./resources/Dockerfile - format: tty - no-fail: false - verbose: true - failure-threshold: info diff --git a/.github/workflows/integration-arm64.yaml b/.github/workflows/integration-arm64.yaml deleted file mode 100644 index 41a7bc824a..0000000000 --- a/.github/workflows/integration-arm64.yaml +++ /dev/null @@ -1,54 +0,0 @@ -name: Cloud Hypervisor Tests (ARM64) -on: [pull_request, merge_group] -concurrency: - group: ${{ github.workflow }}-${{ github.ref }} - cancel-in-progress: true - -jobs: - build: - timeout-minutes: 120 - name: Tests (ARM64) - runs-on: bookworm-arm64 - steps: - - name: Fix workspace permissions - run: sudo chown -R runner:runner ${GITHUB_WORKSPACE} - - name: Code checkout - uses: actions/checkout@v5 - with: - fetch-depth: 0 - - name: Run unit tests (musl) - run: scripts/dev_cli.sh tests --unit --libc musl - - name: Load openvswitch module - run: sudo modprobe openvswitch - - name: Run integration tests (musl) - timeout-minutes: 60 - run: scripts/dev_cli.sh tests --integration --libc musl - - name: Install Azure CLI - if: ${{ github.event_name != 'pull_request' }} - run: | - sudo apt install -y ca-certificates curl apt-transport-https lsb-release gnupg - curl -sL https://packages.microsoft.com/keys/microsoft.asc | gpg --dearmor | sudo tee /etc/apt/trusted.gpg.d/microsoft.gpg > /dev/null - echo "deb [arch=arm64] https://packages.microsoft.com/repos/azure-cli/ bookworm main" | sudo tee /etc/apt/sources.list.d/azure-cli.list - sudo apt update - sudo apt install -y azure-cli - - name: Download Windows image - if: ${{ github.event_name != 'pull_request' }} - shell: bash - run: | - IMG_BASENAME=windows-11-iot-enterprise-aarch64.raw - IMG_PATH=$HOME/workloads/$IMG_BASENAME - IMG_GZ_PATH=$HOME/workloads/$IMG_BASENAME.gz - IMG_GZ_BLOB_NAME=windows-11-iot-enterprise-aarch64-9-min.raw.gz - cp "scripts/$IMG_BASENAME.sha1" "$HOME/workloads/" - pushd "$HOME/workloads" - if sha1sum "$IMG_BASENAME.sha1" --check; then - exit - fi - popd - mkdir -p "$HOME/workloads" - az storage blob download --container-name private-images --file "$IMG_GZ_PATH" --name "$IMG_GZ_BLOB_NAME" --connection-string "${{ secrets.CH_PRIVATE_IMAGES }}" - gzip -d $IMG_GZ_PATH - - name: Run Windows guest integration tests - if: ${{ github.event_name != 'pull_request' }} - timeout-minutes: 30 - run: scripts/dev_cli.sh tests --integration-windows --libc musl diff --git a/.github/workflows/lychee.yaml b/.github/workflows/lychee.yaml deleted file mode 100644 index 191d53e6a3..0000000000 --- a/.github/workflows/lychee.yaml +++ /dev/null @@ -1,45 +0,0 @@ -name: Link Check (lychee) -on: pull_request -jobs: - link_check: - name: Link Check - runs-on: ubuntu-latest - steps: - - name: Code checkout - uses: actions/checkout@v5 - with: - # Fetch the entire history so git diff can compare against the base branch - fetch-depth: 0 - - name: Get changed files in PR - id: changed-files - uses: tj-actions/changed-files@v46 # Using a dedicated action for robustness - with: - # Compare the HEAD of the PR with the merge-base (where the PR branches off) - base_sha: ${{ github.event.pull_request.base.sha }} - - # NEW STEP: Print all changed-files outputs for verification - - name: Verify Changed Files - run: | - echo "--- tj-actions/changed-files Outputs ---" - echo "any_changed: ${{ steps.changed-files.outputs.any_changed }}" - echo "all_changed_files: ${{ steps.changed-files.outputs.all_changed_files }}" - echo "added_files: ${{ steps.changed-files.outputs.added_files }}" - echo "modified_files: ${{ steps.changed-files.outputs.modified_files }}" - echo "deleted_files: ${{ steps.changed-files.outputs.deleted_files }}" - echo "renamed_files: ${{ steps.changed-files.outputs.renamed_files }}" - echo "----------------------------------------" - # This will also show if the all_changed_files string is empty or not - if [ -n "${{ steps.changed-files.outputs.all_changed_files }}" ]; then - echo "Detected changes: all_changed_files output is NOT empty." - else - echo "No changes detected: all_changed_files output IS empty." - fi - - name: Link Availability Check (Diff Only) - # MODIFIED: Only run lychee if the 'all_changed_files' output is not an empty string - if: ${{ steps.changed-files.outputs.all_changed_files != '' }} - uses: lycheeverse/lychee-action@master - with: - # Pass the space-separated list of changed files to lychee - args: --verbose --config .lychee.toml ${{ steps.changed-files.outputs.all_changed_files }} - failIfEmpty: false - fail: true \ No newline at end of file diff --git a/.github/workflows/package-consistency.yaml b/.github/workflows/package-consistency.yaml deleted file mode 100644 index 719aa3d8df..0000000000 --- a/.github/workflows/package-consistency.yaml +++ /dev/null @@ -1,32 +0,0 @@ -name: Cloud Hypervisor Consistency -on: [pull_request, merge_group] -concurrency: - group: ${{ github.workflow }}-${{ github.ref }} - cancel-in-progress: true - -jobs: - build: - name: Rust VMM Consistency Check - runs-on: ubuntu-latest - steps: - - name: Code checkout - uses: actions/checkout@v5 - with: - fetch-depth: 0 - - - name: Install dependencies - run: sudo apt install -y python3 - - - name: Install Rust toolchain stable - uses: dtolnay/rust-toolchain@stable - with: - toolchain: stable - - - name: Check Rust VMM Package Consistency of root Workspace - run: python3 scripts/package-consistency-check.py github.com/rust-vmm - - - name: Check Rust VMM Package Consistency of fuzz Workspace - run: | - pushd fuzz - python3 ../scripts/package-consistency-check.py github.com/rust-vmm - popd diff --git a/.github/workflows/preview-riscv64-modules.yaml b/.github/workflows/preview-riscv64-modules.yaml deleted file mode 100644 index 767d9779a8..0000000000 --- a/.github/workflows/preview-riscv64-modules.yaml +++ /dev/null @@ -1,39 +0,0 @@ -name: Cloud Hypervisor RISC-V 64-bit Preview -on: [pull_request, merge_group] -concurrency: - group: ${{ github.workflow }}-${{ github.ref }} - cancel-in-progress: true - -jobs: - build: - name: Cargo - runs-on: riscv64-qemu-host - strategy: - fail-fast: false - matrix: - module: - - hypervisor - - arch - - vm-allocator - - devices - - steps: - - name: Code checkout - uses: actions/checkout@v5 - with: - fetch-depth: 0 - - - name: Install Rust toolchain - run: /opt/scripts/exec-in-qemu.sh rustup default 1.88.0 - - - name: Build ${{ matrix.module }} Module (kvm) - run: /opt/scripts/exec-in-qemu.sh cargo rustc --locked -p ${{ matrix.module }} --no-default-features --features "kvm" -- -D warnings -D clippy::undocumented_unsafe_blocks -W clippy::assertions_on_result_states - - - name: Clippy ${{ matrix.module }} Module (kvm) - run: /opt/scripts/exec-in-qemu.sh cargo clippy --locked -p ${{ matrix.module }} --no-default-features --features "kvm" -- -D warnings -D clippy::undocumented_unsafe_blocks -W clippy::assertions_on_result_states - - - name: Test ${{ matrix.module }} Module (kvm) - run: /opt/scripts/exec-in-qemu.sh cargo test --locked -p ${{ matrix.module }} --no-default-features --features "kvm" - - - name: Check no files were modified - run: test -z "$(git status --porcelain)" diff --git a/.github/workflows/quality.yaml b/.github/workflows/quality.yaml index f767909a27..57b33b9976 100644 --- a/.github/workflows/quality.yaml +++ b/.github/workflows/quality.yaml @@ -13,17 +13,11 @@ jobs: fail-fast: false matrix: rust: - - beta - stable target: - - aarch64-unknown-linux-gnu - - aarch64-unknown-linux-musl - x86_64-unknown-linux-gnu - - x86_64-unknown-linux-musl include: - - rust: beta - experimental: true - rust: stable experimental: false diff --git a/.github/workflows/release.yaml b/.github/workflows/release.yaml deleted file mode 100644 index 4876fd5d98..0000000000 --- a/.github/workflows/release.yaml +++ /dev/null @@ -1,95 +0,0 @@ -name: Cloud Hypervisor Release -on: [create, merge_group] -concurrency: - group: ${{ github.workflow }}-${{ github.ref }}-${{ github.event_name }} - cancel-in-progress: true -env: - GITHUB_TOKEN: ${{ github.token }} - -jobs: - release: - if: (github.event_name == 'create' && github.event.ref_type == 'tag') || github.event_name == 'merge_group' - name: Release ${{ matrix.platform.target }} - strategy: - fail-fast: false - matrix: - platform: - - target: x86_64-unknown-linux-gnu - args: --all --release --features mshv - name_ch: cloud-hypervisor - name_ch_remote: ch-remote - - target: x86_64-unknown-linux-musl - args: --all --release --features mshv - name_ch: cloud-hypervisor-static - name_ch_remote: ch-remote-static - - target: aarch64-unknown-linux-musl - args: --all --release - name_ch: cloud-hypervisor-static-aarch64 - name_ch_remote: ch-remote-static-aarch64 - runs-on: ubuntu-latest - steps: - - name: Code checkout - uses: actions/checkout@v5 - - name: Install musl-gcc - if: contains(matrix.platform.target, 'musl') - run: sudo apt install -y musl-tools - - name: Create release directory - if: | - github.event_name == 'create' && github.event.ref_type == 'tag' && - matrix.platform.target == 'x86_64-unknown-linux-gnu' - run: rsync -rv --exclude=.git . ../cloud-hypervisor-${{ github.event.ref }} - - name: Build ${{ matrix.platform.target }} - uses: houseabsolute/actions-rust-cross@v1 - with: - command: build - target: ${{ matrix.platform.target }} - args: ${{ matrix.platform.args }} - strip: true - toolchain: "1.88.0" - - name: Copy Release Binaries - if: github.event_name == 'create' && github.event.ref_type == 'tag' - shell: bash - run: | - cp target/${{ matrix.platform.target }}/release/cloud-hypervisor ./${{ matrix.platform.name_ch }} - cp target/${{ matrix.platform.target }}/release/ch-remote ./${{ matrix.platform.name_ch_remote }} - - name: Upload Release Artifacts - if: github.event_name == 'create' && github.event.ref_type == 'tag' - uses: actions/upload-artifact@v4 - with: - name: Artifacts for ${{ matrix.platform.target }} - path: | - ./${{ matrix.platform.name_ch }} - ./${{ matrix.platform.name_ch_remote }} - - name: Vendor - if: | - github.event_name == 'create' && github.event.ref_type == 'tag' && - matrix.platform.target == 'x86_64-unknown-linux-gnu' - working-directory: ../cloud-hypervisor-${{ github.event.ref }} - run: | - mkdir ../vendor-cargo-home - export CARGO_HOME=$(realpath ../vendor-cargo-home) - mkdir .cargo - cargo vendor > .cargo/config.toml - - name: Create vendored source archive - if: | - github.event_name == 'create' && github.event.ref_type == 'tag' && - matrix.platform.target == 'x86_64-unknown-linux-gnu' - run: tar cJf cloud-hypervisor-${{ github.event.ref }}.tar.xz ../cloud-hypervisor-${{ github.event.ref }} - - name: Upload cloud-hypervisor vendored source archive - if: | - github.event_name == 'create' && github.event.ref_type == 'tag' && - matrix.platform.target == 'x86_64-unknown-linux-gnu' - id: upload-release-cloud-hypervisor-vendored-sources - uses: actions/upload-artifact@v4 - with: - path: cloud-hypervisor-${{ github.event.ref }}.tar.xz - name: cloud-hypervisor-${{ github.event.ref }}.tar.xz - - name: Create GitHub Release - if: github.event_name == 'create' && github.event.ref_type == 'tag' - uses: softprops/action-gh-release@v2 - with: - draft: true - files: | - ./${{ matrix.platform.name_ch }} - ./${{ matrix.platform.name_ch_remote }} - ./cloud-hypervisor-${{ github.event.ref }}.tar.xz From 61cf32660d95bd686ddafb312c50ee728cd68f39 Mon Sep 17 00:00:00 2001 From: Philipp Schuster Date: Wed, 27 Aug 2025 16:37:51 +0200 Subject: [PATCH 183/294] github: add PR template --- .github/pull_request_template.md | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) create mode 100644 .github/pull_request_template.md diff --git a/.github/pull_request_template.md b/.github/pull_request_template.md new file mode 100644 index 0000000000..ce7eb0eb16 --- /dev/null +++ b/.github/pull_request_template.md @@ -0,0 +1,28 @@ +## About + + + + +## Checklist (for Author) + +- [ ] [libvirt-tests](https://github.com/cyberus-technology/libvirt-tests) + pipeline succeeded (currently this must be done manually locally) +- [ ] PR associated with + [ticket](https://github.com/cobaltcore-dev/cobaltcore/issues?q=is%3Aissue%20state%3Aopen%20label%3Acyberus%2Ccyberus-maybe) + +## Hints for Reviewers + + + + +## Steps to Undraft (if draft) + + + From 530ebab2a7a0d3486948facbdbf4e50386d610bf Mon Sep 17 00:00:00 2001 From: Philipp Schuster Date: Fri, 27 Jun 2025 13:20:10 +0200 Subject: [PATCH 184/294] misc: live-migration with virtio-net devices with network fds + improve debuggability This is a list of squashed commits. It will be replaced soon by a more graceful git history. Signed-off-by: Philipp Schuster On-behalf-of: SAP philipp.schuster@sap.com --- devices/src/ioapic.rs | 4 +- net_util/src/open_tap.rs | 9 +- net_util/src/tap.rs | 16 +- src/bin/ch-remote.rs | 5 + virtio-devices/src/net.rs | 24 +- .../src/transport/pci_common_config.rs | 2 +- vmm/Cargo.toml | 2 + vmm/src/api/http/http_endpoint.rs | 237 ++++++++++++++---- vmm/src/api/mod.rs | 4 +- vmm/src/config.rs | 81 ++++-- vmm/src/device_manager.rs | 31 ++- vmm/src/lib.rs | 39 ++- vmm/src/vm_config.rs | 28 +-- 13 files changed, 370 insertions(+), 112 deletions(-) diff --git a/devices/src/ioapic.rs b/devices/src/ioapic.rs index 97932f016d..f8214e96c5 100644 --- a/devices/src/ioapic.rs +++ b/devices/src/ioapic.rs @@ -171,7 +171,7 @@ impl BusDevice for Ioapic { return None; } - debug!("IOAPIC_W @ offset 0x{:x}", offset); + trace!("IOAPIC_W @ offset 0x{:x}", offset); let value = LittleEndian::read_u32(data); @@ -249,7 +249,7 @@ impl Ioapic { } fn ioapic_write(&mut self, val: u32) { - debug!("IOAPIC_W reg 0x{:x}, val 0x{:x}", self.reg_sel, val); + trace!("IOAPIC_W reg 0x{:x}, val 0x{:x}", self.reg_sel, val); match self.reg_sel as u8 { IOAPIC_REG_VERSION => { diff --git a/net_util/src/open_tap.rs b/net_util/src/open_tap.rs index 61e763ba20..95924d6df9 100644 --- a/net_util/src/open_tap.rs +++ b/net_util/src/open_tap.rs @@ -76,7 +76,14 @@ fn open_tap_rx_q_0( let tap = match if_name { Some(name) => Tap::open_named(name, num_rx_q, flags).map_err(Error::TapOpen)?, // Create a new Tap device in Linux, if none was specified. - None => Tap::new(num_rx_q).map_err(Error::TapOpen)?, + None => { + let tap = Tap::new(num_rx_q).map_err(Error::TapOpen)?; + log::info!( + "Created tap device: name={}, num_rx_q={num_rx_q}", + tap.if_name_as_str() + ); + tap + } }; // Don't overwrite ip configuration of existing interfaces: if !tap_exists { diff --git a/net_util/src/tap.rs b/net_util/src/tap.rs index 2916e66cf6..93e1b96a0e 100644 --- a/net_util/src/tap.rs +++ b/net_util/src/tap.rs @@ -65,6 +65,16 @@ pub struct Tap { if_name: Vec, } +impl Drop for Tap { + fn drop(&mut self) { + debug!( + "Dropping Tap: if_name={}, FD={}", + self.if_name_as_str(), + self.tap_file.as_raw_fd() + ); + } +} + impl PartialEq for Tap { fn eq(&self, other: &Tap) -> bool { self.if_name == other.if_name @@ -129,6 +139,9 @@ fn ipv6_mask_to_prefix(mask: Ipv6Addr) -> Result { } impl Tap { + /// The default naming scheme for Tap devices that are created by Cloud Hypervisor. + pub const DEFAULT_NAME_SCHEME: &'static str = "vmtap%d"; + /// # Safety /// The caller should ensure to pass a valid file descriptor and valid /// arguments for the `ioctl()` syscall. @@ -183,6 +196,7 @@ impl Tap { if fd < 0 { return Err(Error::OpenTun(IoError::last_os_error())); } + debug!("Opening Tap device with given name: ifname={if_name}, fd={fd}"); // SAFETY: We just checked that the fd is valid. let tuntap = unsafe { File::from_raw_fd(fd) }; @@ -236,7 +250,7 @@ impl Tap { /// Create a new tap interface. pub fn new(num_queue_pairs: usize) -> Result { - Self::open_named("vmtap%d", num_queue_pairs, None) + Self::open_named(Self::DEFAULT_NAME_SCHEME, num_queue_pairs, None) } pub fn from_tap_fd(fd: RawFd, num_queue_pairs: usize) -> Result { diff --git a/src/bin/ch-remote.rs b/src/bin/ch-remote.rs index 803ffc7ee9..75b3159be1 100644 --- a/src/bin/ch-remote.rs +++ b/src/bin/ch-remote.rs @@ -877,6 +877,10 @@ fn coredump_config(destination_url: &str) -> String { fn receive_migration_data(url: &str) -> String { let receive_migration_data = vmm::api::VmReceiveMigrationData { receiver_url: url.to_owned(), + // Only FDs transmitted via an SCM_RIGHTS UNIX Domain Socket message + // are valid. Transmitting specific FD nums via the HTTP API is + // almost always invalid. + net_fds: None, }; serde_json::to_string(&receive_migration_data).unwrap() @@ -997,6 +1001,7 @@ fn get_cli_commands_sorted() -> Box<[Command]> { .arg( Arg::new("receive_migration_config") .index(1) + // Live migration with net_fds not supported in ch-remote. .help(""), ), Command::new("remove-device") diff --git a/virtio-devices/src/net.rs b/virtio-devices/src/net.rs index bbe0a8b37f..2c4e085ca2 100644 --- a/virtio-devices/src/net.rs +++ b/virtio-devices/src/net.rs @@ -255,9 +255,9 @@ impl NetEpollHandler { || !self.driver_awake { self.signal_used_queue(self.queue_index_base)?; - debug!("Signalling RX queue"); + trace!("Signalling RX queue"); } else { - debug!("Not signalling RX queue"); + trace!("Not signalling RX queue"); } Ok(()) } @@ -613,11 +613,12 @@ impl Net { for fd in fds.iter() { // Duplicate so that it can survive reboots // SAFETY: FFI call to dup. Trivially safe. - let fd = unsafe { libc::dup(*fd) }; - if fd < 0 { + let fd_duped = unsafe { libc::dup(*fd) }; + if fd_duped < 0 { return Err(Error::DuplicateTapFd(std::io::Error::last_os_error())); } - let tap = Tap::from_tap_fd(fd, num_queue_pairs).map_err(Error::TapError)?; + debug!("dup'ed fd {fd} => {fd_duped} for virtio-net device {id}"); + let tap = Tap::from_tap_fd(fd_duped, num_queue_pairs).map_err(Error::TapError)?; taps.push(tap); } @@ -661,6 +662,19 @@ impl Net { impl Drop for Net { fn drop(&mut self) { + // Get a comma-separated list of the interface names of the tap devices + // associated with this network device. + let ifnames_str = self + .taps + .iter() + .map(|tap| tap.if_name_as_str()) + .collect::>(); + let ifnames_str = ifnames_str.join(","); + debug!( + "virtio-net device closed: id={}, ifnames=[{ifnames_str}]", + self.id + ); + if let Some(kill_evt) = self.common.kill_evt.take() { // Ignore the result because there is nothing we can do about it. let _ = kill_evt.write(1); diff --git a/virtio-devices/src/transport/pci_common_config.rs b/virtio-devices/src/transport/pci_common_config.rs index 549453a5d6..37790133a6 100644 --- a/virtio-devices/src/transport/pci_common_config.rs +++ b/virtio-devices/src/transport/pci_common_config.rs @@ -234,7 +234,7 @@ impl VirtioPciCommonConfig { } fn read_common_config_word(&self, offset: u64, queues: &[Queue]) -> u16 { - debug!("read_common_config_word: offset 0x{:x}", offset); + trace!("read_common_config_word: offset 0x{:x}", offset); match offset { 0x10 => self.msix_config.load(Ordering::Acquire), 0x12 => queues.len() as u16, // num_queues diff --git a/vmm/Cargo.toml b/vmm/Cargo.toml index b931193a19..25683e83ff 100644 --- a/vmm/Cargo.toml +++ b/vmm/Cargo.toml @@ -58,6 +58,8 @@ landlock = "0.4.2" libc = { workspace = true } linux-loader = { workspace = true, features = ["bzimage", "elf", "pe"] } log = { workspace = true } +# Special fork of micro_http that combines HTTP traffic over a UNIX domain +# socket with UNIX' SCM_RIGHTS mechanism for transferring file descriptors. micro_http = { git = "https://github.com/firecracker-microvm/micro-http", branch = "main" } mshv-bindings = { workspace = true, features = [ "fam-wrappers", diff --git a/vmm/src/api/http/http_endpoint.rs b/vmm/src/api/http/http_endpoint.rs index 5a4bf93da6..74d7e2f608 100644 --- a/vmm/src/api/http/http_endpoint.rs +++ b/vmm/src/api/http/http_endpoint.rs @@ -4,6 +4,32 @@ // SPDX-License-Identifier: Apache-2.0 // +//! # HTTP Endpoints of the Cloud Hypervisor API +//! +//! ## Special Handling for virtio-net Devices Backed by Network File Descriptors (FDs) +//! +//! Some of the HTTP handlers here implement special logic for virtio-net +//! devices **backed by network FDs** to enable live-migration, state save/ +//! resume (restore), and similar VM lifecycle events. +//! +//! The utilized mechanism requires that the control software (e.g., libvirt) +//! connects to Cloud Hypervisor by using a UNIX domain socket and that it +//! passes file descriptors (FDs) via _ancillary_ messages - specifically using +//! the `SCM_RIGHTS` mechanism described in [`cmsg(3)`]. These ancillary +//! messages must accompany the primary payload (HTTP JSON REST API in this +//! case). The Linux kernel handles these messages by `dup()`ing the referenced +//! FDs from the sender process into the receiving process, thereby ensuring +//! they are valid and usable in the target context. +//! +//! Once these valid file descriptors are received here, we integrate the actual +//! FDs into the VM's configuration, allowing the virtio-net device to +//! function correctly with its backing network resources. +//! +//! We can receive these FDs as we use a **special** HTTP library that is aware +//! of the just described mechanism. +//! +//! [`cmsg(3)`]: https://man7.org/linux/man-pages/man3/cmsg.3.html + use std::fs::File; use std::os::unix::io::IntoRawFd; use std::sync::mpsc::Sender; @@ -17,10 +43,10 @@ use crate::api::http::{EndpointHandler, HttpError, error_response}; use crate::api::{ AddDisk, ApiAction, ApiError, ApiRequest, NetConfig, VmAddDevice, VmAddFs, VmAddNet, VmAddPmem, VmAddUserDevice, VmAddVdpa, VmAddVsock, VmBoot, VmConfig, VmCounters, VmDelete, VmNmi, VmPause, - VmPowerButton, VmReboot, VmReceiveMigration, VmRemoveDevice, VmResize, VmResizeZone, VmRestore, - VmResume, VmSendMigration, VmShutdown, VmSnapshot, + VmPowerButton, VmReboot, VmReceiveMigration, VmReceiveMigrationData, VmRemoveDevice, VmResize, + VmResizeZone, VmRestore, VmResume, VmSendMigration, VmShutdown, VmSnapshot, }; -use crate::config::RestoreConfig; +use crate::config::{RestoreConfig, RestoredNetConfig}; use crate::cpu::Error as CpuError; use crate::vm::Error as VmError; @@ -47,11 +73,27 @@ impl EndpointHandler for VmCreate { }; if let Some(ref mut nets) = vm_config.net { - if nets.iter().any(|net| net.fds.is_some()) { - warn!("Ignoring FDs sent via the HTTP request body"); - } - for net in nets { - net.fds = None; + let mut cfgs = nets.iter_mut().collect::>(); + let cfgs = cfgs.as_mut_slice(); + + // For the VmCreate call, we do not accept FDs from the socket currently. + // This call sets all FDs to null while doing the same logging as + // similar code paths. + let res = apply_new_fds_to_cfg::( + vec![], + cfgs, + &|cfg| cfg.id.as_deref(), + &|_| 0, + &|cfg| cfg.fds.as_deref(), + &|cfg, value| { + assert!(value.is_none()); + cfg.fds = None + }, + ) + .map_err(|e| error_response(e, StatusCode::InternalServerError)); + + if let Err(e) = res { + return e; } } @@ -187,30 +229,110 @@ vm_action_put_handler_body!(VmAddUserDevice); vm_action_put_handler_body!(VmRemoveDevice); vm_action_put_handler_body!(VmResizeZone); vm_action_put_handler_body!(VmSnapshot); -vm_action_put_handler_body!(VmReceiveMigration); vm_action_put_handler_body!(VmSendMigration); #[cfg(all(target_arch = "x86_64", feature = "guest_debug"))] vm_action_put_handler_body!(VmCoredump); +/// Applies FDs to the network config of a given device, as part of the special +/// handling for virtio-net devices backed by network FDs. +/// +/// See [module description] for more info. +/// +/// [module description]: self +fn apply_new_fds_to_cfg( + // List of new files (well, actually FDs) that back up a virtio-net device. + files: Vec, + // List of network configurations where each network can have `n` FDs. + network_cfgs: &mut [&mut T], + // Callback to return the ID. + network_cfg_extract_id: &impl Fn(&T) -> Option<&str>, + // Callback to extract the amount of expected FDs. + network_cfg_extract_num_fds_fn: &impl Fn(&T) -> usize, + // Callback to extract the FDs that are part of the type (transmitted via + // the HTTP body) + network_cfg_extract_fds_fn: &impl Fn(&T) -> Option<&[i32]>, + // Callback to set any FDs in the type to the new value. The new value + // is either `Some` with a non-empty Vector or `None`. + network_cfg_replace_fds: &impl Fn(&mut T, Option>), +) -> Result<(), HttpError> { + let expected_fds: usize = network_cfgs + .iter() + .map(|cfg| network_cfg_extract_num_fds_fn(cfg)) + .sum(); + + let mut fds = files + .into_iter() + .map(|f| f.into_raw_fd()) + .collect::>(); + + if fds.len() != expected_fds { + error!( + "Number of FDs expected: {}, but received: {}", + expected_fds, + fds.len() + ); + return Err(HttpError::BadRequest); + } + + for network_cfg in network_cfgs { + let has_fds_from_http_body = network_cfg_extract_fds_fn(network_cfg).is_some(); + if has_fds_from_http_body { + // Only FDs transmitted via an SCM_RIGHTS UNIX Domain Socket message + // are valid. Any provided over the HTTP API are set to `-1` in our + // specialized serializer callbacks. + warn!( + "FD numbers were present in HTTP request body for virtio-net device {:?} but will be ignored", + network_cfg_extract_id(network_cfg) + ); + + // Reset old value in any case; if there are FDs, they are invalid. + network_cfg_replace_fds(*network_cfg, None); + } + + let n = network_cfg_extract_num_fds_fn(network_cfg); + if n > 0 { + let new_fds = fds.drain(..n).collect::>(); + log::debug!( + "Applying network FDs received via UNIX domain socket to virtio-net device: id={:?}, fds={new_fds:?}", + network_cfg_extract_id(network_cfg) + ); + network_cfg_replace_fds(*network_cfg, Some(new_fds)); + } + } + + // We checked that `fds.len() != expected_fds`; so if we panic here, we have a hard + // programming bug + assert!(fds.is_empty()); + + Ok(()) +} + impl PutHandler for VmAddNet { fn handle_request( &'static self, api_notifier: EventFd, api_sender: Sender, body: &Option, - mut files: Vec, + files: Vec, ) -> std::result::Result, HttpError> { if let Some(body) = body { let mut net_cfg: NetConfig = serde_json::from_slice(body.raw())?; - if net_cfg.fds.is_some() { - warn!("Ignoring FDs sent via the HTTP request body"); - net_cfg.fds = None; - } - if !files.is_empty() { - let fds = files.drain(..).map(|f| f.into_raw_fd()).collect(); - net_cfg.fds = Some(fds); - } + + let mut net_cfgs = [&mut net_cfg]; + let num_fds = files.len(); + apply_new_fds_to_cfg::( + files, + &mut net_cfgs, + &|cfg| cfg.id.as_deref(), + // We only have one single network here, so it wants all available FDs. + &|_| num_fds, + &|cfg| cfg.fds.as_deref(), + &|cfg, value| { + cfg.fds = value; + }, + )?; + self.send(api_notifier, api_sender, net_cfg) .map_err(HttpError::ApiError) } else { @@ -221,6 +343,44 @@ impl PutHandler for VmAddNet { impl GetHandler for VmAddNet {} +// Special Handling for virtio-net Devices Backed by Network File Descriptors +// +// See above. +impl PutHandler for VmReceiveMigration { + fn handle_request( + &'static self, + api_notifier: EventFd, + api_sender: Sender, + body: &Option, + files: Vec, + ) -> std::result::Result, HttpError> { + if let Some(body) = body { + let mut net_cfg: VmReceiveMigrationData = serde_json::from_slice(body.raw())?; + if let Some(cfgs) = &mut net_cfg.net_fds { + let mut cfgs = cfgs.iter_mut().collect::>(); + let cfgs = cfgs.as_mut_slice(); + apply_new_fds_to_cfg::( + files, + cfgs, + &|cfg| Some(&cfg.id), + &|cfg| cfg.num_fds, + &|cfg| cfg.fds.as_deref(), + &|cfg, value| { + cfg.fds = value; + }, + )?; + } + + self.send(api_notifier, api_sender, net_cfg) + .map_err(HttpError::ApiError) + } else { + Err(HttpError::BadRequest) + } + } +} + +impl GetHandler for VmReceiveMigration {} + impl PutHandler for VmResize { fn handle_request( &'static self, @@ -249,41 +409,32 @@ impl PutHandler for VmResize { impl GetHandler for VmResize {} +// Special handling for virtio-net devices backed by network FDs. +// See module description for more info. impl PutHandler for VmRestore { fn handle_request( &'static self, api_notifier: EventFd, api_sender: Sender, body: &Option, - mut files: Vec, + files: Vec, ) -> std::result::Result, HttpError> { if let Some(body) = body { let mut restore_cfg: RestoreConfig = serde_json::from_slice(body.raw())?; - let mut fds = Vec::new(); - if !files.is_empty() { - fds = files.drain(..).map(|f| f.into_raw_fd()).collect(); - } - let expected_fds = match restore_cfg.net_fds { - Some(ref net_fds) => net_fds.iter().map(|net| net.num_fds).sum(), - None => 0, - }; - if fds.len() != expected_fds { - error!( - "Number of FDs expected: {}, but received: {}", - expected_fds, - fds.len() - ); - return Err(HttpError::BadRequest); - } - if let Some(ref mut nets) = restore_cfg.net_fds { - warn!("Ignoring FDs sent via the HTTP request body"); - let mut start_idx = 0; - for restored_net in nets.iter_mut() { - let end_idx = start_idx + restored_net.num_fds; - restored_net.fds = Some(fds[start_idx..end_idx].to_vec()); - start_idx = end_idx; - } + if let Some(cfgs) = restore_cfg.net_fds.as_mut() { + let mut cfgs = cfgs.iter_mut().collect::>(); + let cfgs = cfgs.as_mut_slice(); + apply_new_fds_to_cfg::( + files, + cfgs, + &|cfg| Some(&cfg.id), + &|cfg| cfg.num_fds, + &|cfg| cfg.fds.as_deref(), + &|cfg, value| { + cfg.fds = value; + }, + )?; } self.send(api_notifier, api_sender, restore_cfg) diff --git a/vmm/src/api/mod.rs b/vmm/src/api/mod.rs index 5ef63ffa39..a9c075e651 100644 --- a/vmm/src/api/mod.rs +++ b/vmm/src/api/mod.rs @@ -46,7 +46,7 @@ use vmm_sys_util::eventfd::EventFd; pub use self::dbus::start_dbus_thread; pub use self::http::{start_http_fd_thread, start_http_path_thread}; use crate::Error as VmmError; -use crate::config::RestoreConfig; +use crate::config::{RestoreConfig, RestoredNetConfig}; use crate::device_tree::DeviceTree; use crate::vm::{Error as VmError, VmState}; use crate::vm_config::{ @@ -249,6 +249,8 @@ pub struct VmCoredumpData { pub struct VmReceiveMigrationData { /// URL for the reception of migration state pub receiver_url: String, + /// Map with new network FDs on the new host. + pub net_fds: Option>, } #[derive(Clone, Deserialize, Serialize, Default, Debug)] diff --git a/vmm/src/config.rs b/vmm/src/config.rs index 366b369e76..cab06cc153 100644 --- a/vmm/src/config.rs +++ b/vmm/src/config.rs @@ -6,6 +6,7 @@ use std::collections::{BTreeSet, HashMap}; #[cfg(feature = "ivshmem")] use std::fs; +use std::os::fd::RawFd; use std::path::PathBuf; use std::result; use std::str::FromStr; @@ -226,8 +227,8 @@ pub enum ValidationError { #[error("Number of queues to virtio_net does not match the number of input FDs")] VnetQueueFdMismatch, /// Using reserved fd - #[error("Reserved fd number (<= 2)")] - VnetReservedFd, + #[error("Reserved fd number (fd={0} <= 2)")] + VnetReservedFd(RawFd), /// Hardware checksum offload is disabled. #[error("\"offload_tso\" and \"offload_ufo\" depend on \"offload_csum\"")] NoHardwareChecksumOffload, @@ -1479,7 +1480,12 @@ impl NetConfig { if let Some(fds) = self.fds.as_ref() { for fd in fds { if *fd <= 2 { - return Err(ValidationError::VnetReservedFd); + // If we see this, most likely our live migration path for network FDs failed. + log::debug!( + "virtio-net devices {:?} unexpectedly reports invalid FD", + self.id + ); + return Err(ValidationError::VnetReservedFd(*fd)); } } } @@ -2198,29 +2204,35 @@ pub struct RestoredNetConfig { pub id: String, #[serde(default)] pub num_fds: usize, - #[serde( - default, - serialize_with = "serialize_restorednetconfig_fds", - deserialize_with = "deserialize_restorednetconfig_fds" - )] + // Special deserialize handling: + // A serialize-deserialize cycle typically happens across processes. + // The old FD is almost certainly invalid in the new process. + // One way to get actual FDs here in a new process is the `receive-migration` + // path via a UNIX Domain socket: An SCM_RIGHTS UNIX Domain Socket message + // passes new FDs to the Cloud Hypervisor process, but these FDs are handled + // in the HTTP API handler. + #[serde(default, deserialize_with = "deserialize_restorednetconfig_fds")] pub fds: Option>, } -fn serialize_restorednetconfig_fds( - x: &Option>, - s: S, -) -> std::result::Result -where - S: serde::Serializer, -{ - if let Some(x) = x { - warn!( - "'RestoredNetConfig' contains FDs that can't be serialized correctly. Serializing them as invalid FDs." - ); - let invalid_fds = vec![-1; x.len()]; - s.serialize_some(&invalid_fds) - } else { - s.serialize_none() +impl RestoredNetConfig { + // Ensure all net devices from 'VmConfig' backed by FDs have a + // corresponding 'RestoreNetConfig' with a matched 'id' and expected + // number of FDs. + pub fn validate(&self, vm_config: &VmConfig) -> ValidationResult<()> { + let found = vm_config + .net + .iter() + .flatten() + .any(|net| net.id.as_ref() == Some(&self.id)); + + if !found { + Err(ValidationError::RestoreMissingRequiredNetId( + self.id.clone(), + )) + } else { + Ok(()) + } } } @@ -2232,8 +2244,11 @@ where { let invalid_fds: Option> = Option::deserialize(d)?; if let Some(invalid_fds) = invalid_fds { - warn!( - "'RestoredNetConfig' contains FDs that can't be deserialized correctly. Deserializing them as invalid FDs." + // If the live-migration path is used properly, new FDs are passed as + // SCM_RIGHTS message. So, we don't get them from the serialized JSON + // anyway. + debug!( + "FDs in 'RestoredNetConfig' won't be deserialized as they are most likely invalid now. Deserializing them as -1." ); Ok(Some(vec![-1; invalid_fds.len()])) } else { @@ -3101,6 +3116,8 @@ impl VmConfig { /// To use this safely, the caller must guarantee that the input /// fds are all valid. pub unsafe fn add_preserved_fds(&mut self, mut fds: Vec) { + debug!("adding preserved FDs to VM list: {fds:?}"); + if fds.is_empty() { return; } @@ -3154,7 +3171,16 @@ impl Clone for VmConfig { .preserved_fds .as_ref() // SAFETY: FFI call with valid FDs - .map(|fds| fds.iter().map(|fd| unsafe { libc::dup(*fd) }).collect()), + .map(|fds| { + fds.iter() + .map(|fd| { + // SAFETY: Trivially safe. + let fd_duped = unsafe { libc::dup(*fd) }; + warn!("Cloning VM config: duping preserved FD {fd} => {fd_duped}"); + fd_duped + }) + .collect() + }), landlock_rules: self.landlock_rules.clone(), #[cfg(feature = "ivshmem")] ivshmem: self.ivshmem.clone(), @@ -3166,6 +3192,7 @@ impl Clone for VmConfig { impl Drop for VmConfig { fn drop(&mut self) { if let Some(mut fds) = self.preserved_fds.take() { + debug!("Closing preserved FDs from VM: fds={fds:?}"); for fd in fds.drain(..) { // SAFETY: FFI call with valid FDs unsafe { libc::close(fd) }; @@ -4276,7 +4303,7 @@ mod tests { }]); assert_eq!( invalid_config.validate(), - Err(ValidationError::VnetReservedFd) + Err(ValidationError::VnetReservedFd(0)) ); let mut invalid_config = valid_config.clone(); diff --git a/vmm/src/device_manager.rs b/vmm/src/device_manager.rs index 40a3d27cee..577c82eab1 100644 --- a/vmm/src/device_manager.rs +++ b/vmm/src/device_manager.rs @@ -2858,6 +2858,7 @@ impl DeviceManager { let (virtio_device, migratable_device) = if net_cfg.vhost_user { let socket = net_cfg.vhost_socket.as_ref().unwrap().clone(); + debug!("Creating virtio-net device with vhost-user backend: {socket}"); let vu_cfg = VhostUserConfig { socket, num_queues: net_cfg.num_queues, @@ -2900,6 +2901,7 @@ impl DeviceManager { let state = state_from_id(self.snapshot.as_ref(), id.as_str()) .map_err(DeviceManagerError::RestoreGetState)?; let virtio_net = if let Some(ref tap_if_name) = net_cfg.tap { + debug!("Creating virtio-net device from Tap device: {tap_if_name}"); Arc::new(Mutex::new( virtio_devices::Net::new( id.clone(), @@ -2925,6 +2927,7 @@ impl DeviceManager { .map_err(DeviceManagerError::CreateVirtioNet)?, )) } else if let Some(fds) = &net_cfg.fds { + debug!("Creating virtio-net device from network FDs: {fds:?}"); let net = virtio_devices::Net::from_tap_fds( id.clone(), fds, @@ -2951,6 +2954,9 @@ impl DeviceManager { Arc::new(Mutex::new(net)) } else { + debug!( + "Creating virtio-net device: no ifname or FDs given, creating new Tap device" + ); Arc::new(Mutex::new( virtio_devices::Net::new( id.clone(), @@ -4401,6 +4407,10 @@ impl DeviceManager { Ok(()) } + /// Notifies the VM for a hotplug. + /// + /// This call doesn't wait for the vCPU receiving the + /// interrupt to acknowledge. pub fn notify_hotplug( &self, _notification_type: AcpiNotificationFlags, @@ -4513,8 +4523,25 @@ impl DeviceManager { .device_type(), ); match device_type { - VirtioDeviceType::Net - | VirtioDeviceType::Block + VirtioDeviceType::Net => { + let mut config = self.config.lock().unwrap(); + let nets = config.net.as_deref_mut().unwrap(); + let net_dev_cfg = nets + .iter_mut() + .find(|net| net.id.as_ref() == Some(&id)) + .unwrap(); + let fds = net_dev_cfg.fds.take().unwrap_or(Vec::new()); + + debug!("Closing preserved FDs from virtio-net device: id={id}, fds={fds:?}"); + for fd in fds { + config.preserved_fds.as_mut().unwrap().retain(|x| *x != fd); + // SAFETY: Trivially safe. We know the FD is not referenced any longer. + unsafe { + libc::close(fd); + } + } + } + VirtioDeviceType::Block | VirtioDeviceType::Pmem | VirtioDeviceType::Fs | VirtioDeviceType::Vsock => {} diff --git a/vmm/src/lib.rs b/vmm/src/lib.rs index 76b63d10be..a811c5a4b7 100644 --- a/vmm/src/lib.rs +++ b/vmm/src/lib.rs @@ -2144,8 +2144,8 @@ impl RequestHandler for Vmm { receive_data_migration: VmReceiveMigrationData, ) -> result::Result<(), MigratableError> { info!( - "Receiving migration: receiver_url = {}", - receive_data_migration.receiver_url + "Receiving migration: receiver_url = {}, net_fds={:?}", + receive_data_migration.receiver_url, &receive_data_migration.net_fds ); // Accept the connection and get the socket @@ -2172,11 +2172,36 @@ impl RequestHandler for Vmm { Response::error().write_to(&mut socket)?; continue; } - memory_manager = Some(self.vm_receive_config( - &req, - &mut socket, - existing_memory_files.take(), - )?); + + let memory_manager_config = + self.vm_receive_config(&req, &mut socket, existing_memory_files.take())?; + memory_manager = Some(memory_manager_config); + + if let Some(ref restored_net_configs) = receive_data_migration.net_fds { + // TODO do some validaiton + //restored_net_config.validate(); + // Update VM's net configurations with new fds received for restore operation + + let mut vm_config = self.vm_config.as_mut().unwrap().lock().unwrap(); + { + for net in restored_net_configs { + for net_config in vm_config.net.iter_mut().flatten() { + // update only if the net dev is backed by FDs + if net_config.id == Some(net.id.clone()) + && net_config.fds.is_some() + { + log::error!( + "overwriting net fds: id={}, old={:?}, new={:?}", + net.id, + &net_config.fds, + &net.fds + ); + net_config.fds.clone_from(&net.fds); + } + } + } + } + }; } Command::State => { info!("State Command Received"); diff --git a/vmm/src/vm_config.rs b/vmm/src/vm_config.rs index 7a581307e7..33147092eb 100644 --- a/vmm/src/vm_config.rs +++ b/vmm/src/vm_config.rs @@ -326,11 +326,10 @@ pub struct NetConfig { pub vhost_mode: VhostMode, #[serde(default)] pub id: Option, - #[serde( - default, - serialize_with = "serialize_netconfig_fds", - deserialize_with = "deserialize_netconfig_fds" - )] + // Special deserialize handling: + // A serialize-deserialize cycle typically happens across processes. + // The old FD is almost certainly invalid in the new process. + #[serde(default, deserialize_with = "deserialize_netconfig_fds")] pub fds: Option>, #[serde(default)] pub rate_limiter_config: Option, @@ -382,29 +381,14 @@ pub fn default_netconfig_queue_size() -> u16 { DEFAULT_NET_QUEUE_SIZE } -fn serialize_netconfig_fds(x: &Option>, s: S) -> Result -where - S: serde::Serializer, -{ - if let Some(x) = x { - warn!( - "'NetConfig' contains FDs that can't be serialized correctly. Serializing them as invalid FDs." - ); - let invalid_fds = vec![-1; x.len()]; - s.serialize_some(&invalid_fds) - } else { - s.serialize_none() - } -} - fn deserialize_netconfig_fds<'de, D>(d: D) -> Result>, D::Error> where D: serde::Deserializer<'de>, { let invalid_fds: Option> = Option::deserialize(d)?; if let Some(invalid_fds) = invalid_fds { - warn!( - "'NetConfig' contains FDs that can't be deserialized correctly. Deserializing them as invalid FDs." + debug!( + "FDs in 'NetConfig' won't be deserialized as they are most likely invalid now. Deserializing them as -1." ); Ok(Some(vec![-1; invalid_fds.len()])) } else { From dc25ae9c6e538453ea6dd8c544985f87fd24ff56 Mon Sep 17 00:00:00 2001 From: Philipp Schuster Date: Tue, 26 Aug 2025 16:23:24 +0200 Subject: [PATCH 185/294] vmm: logger: improve output of seconds The old output doesn't nicely align across multiple lines. # Example (old style) ``` cloud-hypervisor: 858.465660ms: DEBUG:devices/src/ioapic.rs:154 -- IOAPIC_R @ offset 0x10 cloud-hypervisor: 858.507342ms: DEBUG:devices/src/ioapic.rs:298 -- IOAPIC_R reg 0x1 cloud-hypervisor: 1.010001s: DEBUG:devices/src/ioapic.rs:174 -- IOAPIC_W @ offset 0x0 cloud-hypervisor: 1.010067s: DEBUG:devices/src/ioapic.rs:154 -- IOAPIC_R @ offset 0x10 ``` # Example (new style) ``` cloud-hypervisor: 0.731399s: DEBUG:devices/src/ioapic.rs:174 -- IOAPIC_W @ offset 0x10 cloud-hypervisor: 0.731420s: DEBUG:devices/src/ioapic.rs:252 -- IOAPIC_W reg 0x2a, val 0x10000 cloud-hypervisor: 17.026073s: INFO:vmm/src/api/mod.rs:898 -- API request event: VmInfo cloud-hypervisor: 17.263210s: INFO:vmm/src/api/mod.rs:898 -- API request event: VmInfo cloud-hypervisor: 17.547915s: INFO:vmm/src/api/mod.rs:898 -- API request event: VmInfo ``` Signed-off-by: Philipp Schuster On-behalf-of: SAP philipp.schuster@sap.com --- src/main.rs | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/src/main.rs b/src/main.rs index 971d743142..3f41ecbbec 100644 --- a/src/main.rs +++ b/src/main.rs @@ -133,12 +133,14 @@ impl log::Log for Logger { let now = std::time::Instant::now(); let duration = now.duration_since(self.start); + let duration_s = duration.as_secs_f32(); if record.file().is_some() && record.line().is_some() { write!( *(*(self.output.lock().unwrap())), - "cloud-hypervisor: {:.6?}: <{}> {}:{}:{} -- {}\r\n", - duration, + // 10: 6 decimal places + sep => 0..999s will be properly aligned + "cloud-hypervisor: {:>10.6?}s: <{}> {}:{}:{} -- {}\r\n", + duration_s, std::thread::current().name().unwrap_or("anonymous"), record.level(), record.file().unwrap(), @@ -148,8 +150,9 @@ impl log::Log for Logger { } else { write!( *(*(self.output.lock().unwrap())), - "cloud-hypervisor: {:.6?}: <{}> {}:{} -- {}\r\n", - duration, + // 10: 6 decimal places + sep => 0..999s will be properly aligned + "cloud-hypervisor: {:>10.6?}s: <{}> {}:{} -- {}\r\n", + duration_s, std::thread::current().name().unwrap_or("anonymous"), record.level(), record.target(), From 871e77dc2aecbd33caa8e0186faac555adbad388 Mon Sep 17 00:00:00 2001 From: Philipp Schuster Date: Tue, 26 Aug 2025 16:25:48 +0200 Subject: [PATCH 186/294] vmm: logger: improve code, remove duplication Signed-off-by: Philipp Schuster On-behalf-of: SAP philipp.schuster@sap.com --- src/main.rs | 39 ++++++++++++++++----------------------- 1 file changed, 16 insertions(+), 23 deletions(-) diff --git a/src/main.rs b/src/main.rs index 3f41ecbbec..d83a90683a 100644 --- a/src/main.rs +++ b/src/main.rs @@ -135,30 +135,23 @@ impl log::Log for Logger { let duration = now.duration_since(self.start); let duration_s = duration.as_secs_f32(); - if record.file().is_some() && record.line().is_some() { - write!( - *(*(self.output.lock().unwrap())), - // 10: 6 decimal places + sep => 0..999s will be properly aligned - "cloud-hypervisor: {:>10.6?}s: <{}> {}:{}:{} -- {}\r\n", - duration_s, - std::thread::current().name().unwrap_or("anonymous"), - record.level(), - record.file().unwrap(), - record.line().unwrap(), - record.args() - ) + let location = if let (Some(file), Some(line)) = (record.file(), record.line()) { + format!("{}:{}", file, line) } else { - write!( - *(*(self.output.lock().unwrap())), - // 10: 6 decimal places + sep => 0..999s will be properly aligned - "cloud-hypervisor: {:>10.6?}s: <{}> {}:{} -- {}\r\n", - duration_s, - std::thread::current().name().unwrap_or("anonymous"), - record.level(), - record.target(), - record.args() - ) - } + record.target().to_string() + }; + + let mut out = self.output.lock().unwrap(); + write!( + &mut *out, + // 10: 6 decimal places + sep => 0..999s will be properly aligned + "cloud-hypervisor: {:>10.6?}s: <{}> {}:{} -- {}\r\n", + duration_s, + std::thread::current().name().unwrap_or("anonymous"), + record.level(), + location, + record.args(), + ) .ok(); } fn flush(&self) {} From f3b56fc497facfd6e189c12380df5c16f1e5bf20 Mon Sep 17 00:00:00 2001 From: Philipp Schuster Date: Wed, 27 Aug 2025 10:33:53 +0200 Subject: [PATCH 187/294] cargo: add optimized-dev profile TL;DR: Fix for long rebuilds locally when testing things. The release profile is optimized for maximum performance, sacrificing build speed. As local development and testing requires frequent rebuilds, but the dev profile is way too slow for "real testing", this profile is a sweet spot and helps to investigate things. Instead of `cargo run --release`, one can now run `cargo run --profile optimized-dev`. # Measurements Measurements were done using `$ [cargo clean;] time cargo build --profile release|optimized-dev` and rustc 1.89. I've used the `time`-builtin from zsh. Note that user time is much higher as we have more threads (codegen units) now. The total time is much shorter, tho. ## Clean Build Speedup of 56%. - `$ time cargo clean --release`: `109,67s user 13,64s system 211% cpu 58,343 total` - `$ time cargo clean --profile optimized-dev`: `185,41s user 14,92s system 528% cpu 37,876 total` ## Incremental Build Speedup of 153%. - `$ time cargo clean --release`: `37,58s user 1,53s system 117% cpu 33,356 total` - `$ time cargo clean --profile optimized-dev`: `47,62s user 1,71s system 373% cpu 13,220 total` --- Cargo.toml | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/Cargo.toml b/Cargo.toml index e68233b059..ed4c5225f8 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -23,6 +23,20 @@ lto = true opt-level = "s" strip = true +# Tradeof between performance and fast compilation times for local testing and +# development with frequent rebuilds. +[profile.optimized-dev] +codegen-units = 16 +inherits = "release" +lto = false +opt-level = 2 +strip = false + +# Optimize more for dependencies: They don't require frequent rebuilds. +[profile.optimized-dev.package."*"] +codegen-units = 1 +opt-level = 3 + [profile.profiling] debug = true inherits = "release" From ff638c62f31e190d24e47496ae786e0c536820e0 Mon Sep 17 00:00:00 2001 From: Jinrong Liang Date: Mon, 14 Apr 2025 21:21:23 +0800 Subject: [PATCH 188/294] pr #7033 squashed 2025-08-18: downtime limits Current (squashed) state of: https://github.com/cloud-hypervisor/cloud-hypervisor/pull/7033/commits --- vm-migration: Add support for downtime limits Add handling of migration timeout failures to provide more flexible live migration options. Implement downtime limiting logic to minimize service disruptions. Support for setting downtime thresholds and migration timeouts. Signed-off-by: Jinrong Liang Signed-off-by: Songqian Li docs: Add migration parameters to live migration document Updated live migration documentation to include migration timeout controls and downtime limits. Signed-off-by: Jinrong Liang Signed-off-by: Songqian Li tests: Add downtime and migration timeout tests Signed-off-by: Jinrong Liang Signed-off-by: Songqian Li --- docs/live_migration.md | 29 ++- src/bin/ch-remote.rs | 42 +++- tests/integration.rs | 97 ++++++++- vmm/src/api/mod.rs | 11 ++ vmm/src/api/openapi/cloud-hypervisor.yaml | 10 + vmm/src/lib.rs | 229 ++++++++++++++++++---- vmm/src/memory_manager.rs | 6 +- 7 files changed, 376 insertions(+), 48 deletions(-) diff --git a/docs/live_migration.md b/docs/live_migration.md index 94c9afc236..5c77d2625f 100644 --- a/docs/live_migration.md +++ b/docs/live_migration.md @@ -171,7 +171,13 @@ After a few seconds the VM should be up and you can interact with it. Initiate the Migration over TCP: ```console -src $ ch-remote --api-socket=/tmp/api send-migration tcp:{dst}:{port} +src $ ch-remote --api-socket=/tmp/api send-migration tcp:{dst}:{port} +``` + +With migration parameters: + +```console +src $ ch-remote --api-socket=/tmp/api send-migration tcp:{dst}:{port} --migration-timeout 60 --downtime 5000 ``` > Replace {dst}:{port} with the actual IP address and port of your destination host. @@ -180,3 +186,24 @@ After completing the above commands, the source VM will be migrated to the destination host and continue running there. The source VM instance will terminate normally. All ongoing processes and connections within the VM should remain intact after the migration. + +#### Migration Parameters + +Cloud Hypervisor supports additional parameters to control the +migration process: + +- `migration-timeout ` +Sets the maximum time (in seconds) allowed for the migration process. +If the migration takes longer than this timeout, it will be aborted. A +value of 0 means no timeout limit. +- `downtime ` +Sets the maximum acceptable downtime (in milliseconds) during the +migration. This parameter helps control the trade-off between migration +time and VM downtime. + +> The downtime limit is related to the cost of serialization +(deserialization) of vCPU and device state. Therefore, the expected +downtime is always shorter than the actual downtime. + +These parameters can be used with the `send-migration` command to +fine-tune the migration behavior according to your requirements. \ No newline at end of file diff --git a/src/bin/ch-remote.rs b/src/bin/ch-remote.rs index 75b3159be1..61e30c33d3 100644 --- a/src/bin/ch-remote.rs +++ b/src/bin/ch-remote.rs @@ -479,6 +479,16 @@ fn rest_api_do_command(matches: &ArgMatches, socket: &mut UnixStream) -> ApiResu .subcommand_matches("send-migration") .unwrap() .get_flag("send_migration_local"), + *matches + .subcommand_matches("send-migration") + .unwrap() + .get_one::("downtime-ms") + .unwrap_or(&300), + *matches + .subcommand_matches("send-migration") + .unwrap() + .get_one::("migration-timeout-s") + .unwrap_or(&3600), ); simple_api_command(socket, "PUT", "send-migration", Some(&send_migration_data)) .map_err(Error::HttpApiClient) @@ -693,6 +703,16 @@ fn dbus_api_do_command(matches: &ArgMatches, proxy: &DBusApi1ProxyBlocking<'_>) .subcommand_matches("send-migration") .unwrap() .get_flag("send_migration_local"), + *matches + .subcommand_matches("send-migration") + .unwrap() + .get_one::("downtime-ms") + .unwrap_or(&300), + *matches + .subcommand_matches("send-migration") + .unwrap() + .get_one::("migration-timeout-s") + .unwrap_or(&3600), ); proxy.api_vm_send_migration(&send_migration_data) } @@ -886,10 +906,12 @@ fn receive_migration_data(url: &str) -> String { serde_json::to_string(&receive_migration_data).unwrap() } -fn send_migration_data(url: &str, local: bool) -> String { +fn send_migration_data(url: &str, local: bool, downtime: u64, migration_timeout: u64) -> String { let send_migration_data = vmm::api::VmSendMigrationData { destination_url: url.to_owned(), local, + downtime, + migration_timeout, }; serde_json::to_string(&send_migration_data).unwrap() @@ -1051,6 +1073,24 @@ fn get_cli_commands_sorted() -> Box<[Command]> { Command::new("resume").about("Resume the VM"), Command::new("send-migration") .about("Initiate a VM migration") + .arg( + Arg::new("downtime-ms") + .long("downtime-ms") + .visible_alias("downtime") + .help("Set the expected maximum downtime in milliseconds") + .num_args(1) + .value_parser(clap::value_parser!(u64)) + .default_value("300"), + ) + .arg( + Arg::new("migration-timeout-s") + .long("migration-timeout-s") + .visible_alias("migration-timeout") + .help("Set the maximum allowed migration time in seconds") + .num_args(1) + .value_parser(clap::value_parser!(u64)) + .default_value("3600"), + ) .arg( Arg::new("send_migration_config") .index(1) diff --git a/tests/integration.rs b/tests/integration.rs index e18d7fb7b1..8369560799 100644 --- a/tests/integration.rs +++ b/tests/integration.rs @@ -7614,7 +7614,9 @@ mod ivshmem { &migration_socket, &src_api_socket, &dest_api_socket, - local + local, + 300, + 60 ), "Unsuccessful command: 'send-migration' or 'receive-migration'." ); @@ -9778,6 +9780,8 @@ mod live_migration { src_api_socket: &str, dest_api_socket: &str, local: bool, + downtime: u64, + timeout: u64, ) -> bool { // Start to receive migration from the destination VM let mut receive_migration = Command::new(clh_command("ch-remote")) @@ -9798,6 +9802,10 @@ mod live_migration { format!("--api-socket={}", &src_api_socket), "send-migration".to_string(), format! {"unix:{migration_socket}"}, + "--downtime".to_string(), + format!("{downtime}"), + "--migration-timeout".to_string(), + format!("{timeout}"), ] .to_vec(); @@ -10022,8 +10030,18 @@ mod live_migration { .unwrap(), ); + let downtime = 100_000; // 100s + let migration_timeout = 1000; // 1000s + assert!( - start_live_migration(&migration_socket, &src_api_socket, &dest_api_socket, local), + start_live_migration( + &migration_socket, + &src_api_socket, + &dest_api_socket, + local, + downtime, + migration_timeout + ), "Unsuccessful command: 'send-migration' or 'receive-migration'." ); }); @@ -10196,8 +10214,18 @@ mod live_migration { .unwrap(), ); + let downtime = 100000; + let migration_timeout = 1000; + assert!( - start_live_migration(&migration_socket, &src_api_socket, &dest_api_socket, local), + start_live_migration( + &migration_socket, + &src_api_socket, + &dest_api_socket, + local, + downtime, + migration_timeout + ), "Unsuccessful command: 'send-migration' or 'receive-migration'." ); }); @@ -10414,8 +10442,18 @@ mod live_migration { .unwrap(), ); + let downtime = 100000; + let migration_timeout = 1000; + assert!( - start_live_migration(&migration_socket, &src_api_socket, &dest_api_socket, local), + start_live_migration( + &migration_socket, + &src_api_socket, + &dest_api_socket, + local, + downtime, + migration_timeout + ), "Unsuccessful command: 'send-migration' or 'receive-migration'." ); }); @@ -10630,8 +10668,18 @@ mod live_migration { .unwrap(), ); + let downtime = 100000; + let migration_timeout = 1000; + assert!( - start_live_migration(&migration_socket, &src_api_socket, &dest_api_socket, local), + start_live_migration( + &migration_socket, + &src_api_socket, + &dest_api_socket, + local, + downtime, + migration_timeout + ), "Unsuccessful command: 'send-migration' or 'receive-migration'." ); }); @@ -10740,8 +10788,18 @@ mod live_migration { .unwrap(), ); + let downtime = 100000; + let migration_timeout = 1000; + assert!( - start_live_migration(&migration_socket, &src_api_socket, &dest_api_socket, local), + start_live_migration( + &migration_socket, + &src_api_socket, + &dest_api_socket, + local, + downtime, + migration_timeout + ), "Unsuccessful command: 'send-migration' or 'receive-migration'." ); }); @@ -10887,8 +10945,18 @@ mod live_migration { .unwrap(), ); + let downtime = 100000; + let migration_timeout = 1000; + assert!( - start_live_migration(&migration_socket, &src_api_socket, &dest_api_socket, true), + start_live_migration( + &migration_socket, + &src_api_socket, + &dest_api_socket, + true, + downtime, + migration_timeout + ), "Unsuccessful command: 'send-migration' or 'receive-migration'." ); }); @@ -10943,7 +11011,12 @@ mod live_migration { .port() } - fn start_live_migration_tcp(src_api_socket: &str, dest_api_socket: &str) -> bool { + fn start_live_migration_tcp( + src_api_socket: &str, + dest_api_socket: &str, + downtime: u64, + timeout: u64, + ) -> bool { // Get an available TCP port let migration_port = get_available_port(); let host_ip = "127.0.0.1"; @@ -10970,6 +11043,10 @@ mod live_migration { &format!("--api-socket={src_api_socket}"), "send-migration", &format!("tcp:{host_ip}:{migration_port}"), + "--downtime", + &format!("{downtime}"), + "--migration-timeout", + &format!("{timeout}"), ]) .stdin(Stdio::null()) .stderr(Stdio::piped()) @@ -11040,6 +11117,8 @@ mod live_migration { .output() .expect("Expect creating disk image to succeed"); let pmem_path = String::from("/dev/pmem0"); + let downtime = 100000; + let timeout = 1000; // Start the source VM let src_vm_path = clh_command("cloud-hypervisor"); @@ -11102,7 +11181,7 @@ mod live_migration { } // Start TCP live migration assert!( - start_live_migration_tcp(&src_api_socket, &dest_api_socket), + start_live_migration_tcp(&src_api_socket, &dest_api_socket, downtime, timeout), "Unsuccessful command: 'send-migration' or 'receive-migration'." ); }); diff --git a/vmm/src/api/mod.rs b/vmm/src/api/mod.rs index a9c075e651..9513088dff 100644 --- a/vmm/src/api/mod.rs +++ b/vmm/src/api/mod.rs @@ -260,6 +260,17 @@ pub struct VmSendMigrationData { /// Send memory across socket without copying #[serde(default)] pub local: bool, + /// Microsecond level downtime + #[serde(default = "default_downtime")] + pub downtime: u64, + /// Second level migration timeout + #[serde(default)] + pub migration_timeout: u64, +} + +// Default value for downtime the same as qemu. +fn default_downtime() -> u64 { + 300 } pub enum ApiResponsePayload { diff --git a/vmm/src/api/openapi/cloud-hypervisor.yaml b/vmm/src/api/openapi/cloud-hypervisor.yaml index e4a76f6b74..919555b2c4 100644 --- a/vmm/src/api/openapi/cloud-hypervisor.yaml +++ b/vmm/src/api/openapi/cloud-hypervisor.yaml @@ -1249,6 +1249,16 @@ components: type: string local: type: boolean + downtime: + type: integer + format: int64 + description: Maximum downtime in milliseconds during migration + default: 500 + migration_timeout: + type: integer + format: int64 + description: Total timeout for migration in milliseconds (0 = no limit) + default: 0 VmAddUserDevice: required: diff --git a/vmm/src/lib.rs b/vmm/src/lib.rs index a811c5a4b7..df8ddfb774 100644 --- a/vmm/src/lib.rs +++ b/vmm/src/lib.rs @@ -20,13 +20,14 @@ use std::rc::Rc; use std::sync::mpsc::{Receiver, RecvError, SendError, Sender}; use std::sync::{Arc, Mutex}; #[cfg(not(target_arch = "riscv64"))] -use std::time::Instant; +use std::time::{Duration, Instant}; use std::{io, result, thread}; use anyhow::anyhow; #[cfg(feature = "dbus_api")] use api::dbus::{DBusApiOptions, DBusApiShutdownChannels}; use api::http::HttpApiHandle; +use arch::PAGE_SIZE; #[cfg(all(feature = "kvm", target_arch = "x86_64"))] use arch::x86_64::MAX_SUPPORTED_CPUS_LEGACY; use console_devices::{ConsoleInfo, pre_create_console_devices}; @@ -646,6 +647,45 @@ impl VmmVersionInfo { } } +#[derive(Debug, Clone)] +struct MigrationState { + current_dirty_pages: u64, + downtime: Duration, + downtime_start: Instant, + iteration: u64, + iteration_cost_time: Duration, + iteration_start_time: Instant, + mb_per_sec: f64, + pages_per_second: u64, + pending_size: u64, + start_time: Instant, + threshold_size: u64, + total_time: Duration, + total_transferred_bytes: u64, + total_transferred_dirty_pages: u64, +} + +impl MigrationState { + pub fn new() -> Self { + Self { + current_dirty_pages: 0, + downtime: Duration::default(), + downtime_start: Instant::now(), + iteration: 0, + iteration_cost_time: Duration::default(), + iteration_start_time: Instant::now(), + mb_per_sec: 0.0, + pages_per_second: 0, + pending_size: 0, + start_time: Instant::now(), + threshold_size: 0, + total_time: Duration::default(), + total_transferred_bytes: 0, + total_transferred_dirty_pages: 0, + } + } +} + pub struct VmmThreadHandle { pub thread_handle: thread::JoinHandle>, #[cfg(feature = "dbus_api")] @@ -1084,10 +1124,8 @@ impl Vmm { fn vm_maybe_send_dirty_pages( vm: &mut Vm, socket: &mut SocketStream, + table: MemoryRangeTable, ) -> result::Result { - // Send (dirty) memory table - let table = vm.dirty_log()?; - // But if there are no regions go straight to pause if table.regions().is_empty() { return Ok(false); @@ -1105,6 +1143,150 @@ impl Vmm { Ok(true) } + fn memory_copy_iterations( + vm: &mut Vm, + socket: &mut SocketStream, + s: &mut MigrationState, + migration_timeout: Duration, + migrate_downtime_limit: Duration, + ) -> result::Result { + let mut bandwidth = 0.0; + let mut iteration_table; + + loop { + // Update the start time of the iteration + s.iteration_start_time = Instant::now(); + + // Increment iteration counter + s.iteration += 1; + + // Check if migration has timed out + // migration_timeout > 0 means enabling the timeout check, 0 means disabling the timeout check + if !migration_timeout.is_zero() && s.start_time.elapsed() > migration_timeout { + warn!("Migration timed out after {:?}", migration_timeout); + Request::abandon().write_to(socket)?; + Response::read_from(socket)?.ok_or_abandon( + socket, + MigratableError::MigrateSend(anyhow!("Migration timed out")), + )?; + } + + // Get the dirty page table + iteration_table = vm.dirty_log()?; + + // Update the pending size (amount of data to transfer) + s.pending_size = iteration_table + .regions() + .iter() + .map(|range| range.length) + .sum(); + + // Update thresholds + if bandwidth > 0.0 { + s.threshold_size = bandwidth as u64 * migrate_downtime_limit.as_millis() as u64; + } + + // Enter the final stage of migration when the suspension conditions are met + if s.iteration > 1 && s.pending_size <= s.threshold_size { + break; + } + + // Update the number of dirty pages + s.total_transferred_bytes += s.pending_size; + s.current_dirty_pages = s.pending_size.div_ceil(PAGE_SIZE as u64); + s.total_transferred_dirty_pages += s.current_dirty_pages; + + // Send the current dirty pages + let transfer_start = Instant::now(); + Self::vm_maybe_send_dirty_pages(vm, socket, iteration_table.clone())?; + let transfer_time = transfer_start.elapsed().as_millis() as f64; + + // Update bandwidth + if transfer_time > 0.0 && s.pending_size > 0 { + bandwidth = s.pending_size as f64 / transfer_time; + // Convert bandwidth to MB/s + s.mb_per_sec = (bandwidth * 1000.0) / (1024.0 * 1024.0); + } + + // Update iteration cost time + s.iteration_cost_time = s.iteration_start_time.elapsed(); + if s.iteration_cost_time.as_millis() > 0 { + s.pages_per_second = + s.current_dirty_pages * 1000 / s.iteration_cost_time.as_millis() as u64; + } + } + + Ok(iteration_table) + } + + fn do_memory_migration( + vm: &mut Vm, + socket: &mut SocketStream, + s: &mut MigrationState, + send_data_migration: &VmSendMigrationData, + ) -> result::Result<(), MigratableError> { + // Start logging dirty pages + vm.start_dirty_log()?; + + // Send memory table + let table = vm.memory_range_table()?; + Request::memory(table.length()).write_to(socket).unwrap(); + table.write_to(socket)?; + // And then the memory itself + vm.send_memory_regions(&table, socket)?; + Response::read_from(socket)?.ok_or_abandon( + socket, + MigratableError::MigrateSend(anyhow!("Error during dirty memory migration")), + )?; + + // Define the maximum allowed downtime 2000 seconds(2000000 milliseconds) + const MAX_MIGRATE_DOWNTIME: u64 = 2000000; + + // Verify that downtime must be between 1 and MAX_MIGRATE_DOWNTIME + if send_data_migration.downtime == 0 || send_data_migration.downtime > MAX_MIGRATE_DOWNTIME + { + return Err(MigratableError::MigrateSend(anyhow!( + "downtime_limit must be an integer in the range of 1 to {} ms", + MAX_MIGRATE_DOWNTIME + ))); + } + + let migration_timeout = Duration::from_secs(send_data_migration.migration_timeout); + let migrate_downtime_limit = Duration::from_millis(send_data_migration.downtime); + + // Verify that downtime must be less than the migration timeout + if !migration_timeout.is_zero() && migrate_downtime_limit >= migration_timeout { + return Err(MigratableError::MigrateSend(anyhow!( + "downtime_limit {}ms must be less than migration_timeout {}ms", + send_data_migration.downtime, + send_data_migration.migration_timeout * 1000 + ))); + } + + let iteration_table = + Self::memory_copy_iterations(vm, socket, s, migration_timeout, migrate_downtime_limit)?; + + info!("Entering downtime phase"); + s.downtime_start = Instant::now(); + vm.pause()?; + + // Send last batch of dirty pages + let mut final_table = vm.dirty_log()?; + final_table.extend(iteration_table.clone()); + Self::vm_maybe_send_dirty_pages(vm, socket, final_table.clone())?; + + // Update statistics + s.pending_size = final_table.regions().iter().map(|range| range.length).sum(); + s.total_transferred_bytes += s.pending_size; + s.current_dirty_pages = s.pending_size.div_ceil(PAGE_SIZE as u64); + s.total_transferred_dirty_pages += s.current_dirty_pages; + + // Stop logging dirty pages + vm.stop_dirty_log()?; + + Ok(()) + } + fn send_migration( vm: &mut Vm, #[cfg(all(feature = "kvm", target_arch = "x86_64"))] hypervisor: Arc< @@ -1112,6 +1294,8 @@ impl Vmm { >, send_data_migration: VmSendMigrationData, ) -> result::Result<(), MigratableError> { + let mut s = MigrationState::new(); + // Set up the socket connection let mut socket = Self::send_migration_socket(&send_data_migration.destination_url)?; @@ -1188,36 +1372,7 @@ impl Vmm { // Now pause VM vm.pause()?; } else { - // Start logging dirty pages - vm.start_dirty_log()?; - - // Send memory table - let table = vm.memory_range_table()?; - Request::memory(table.length()) - .write_to(&mut socket) - .unwrap(); - table.write_to(&mut socket)?; - // And then the memory itself - vm.send_memory_regions(&table, &mut socket)?; - Response::read_from(&mut socket)?.ok_or_abandon( - &mut socket, - MigratableError::MigrateSend(anyhow!("Error during dirty memory migration")), - )?; - - // Try at most 5 passes of dirty memory sending - const MAX_DIRTY_MIGRATIONS: usize = 5; - for i in 0..MAX_DIRTY_MIGRATIONS { - info!("Dirty memory migration {} of {}", i, MAX_DIRTY_MIGRATIONS); - if !Self::vm_maybe_send_dirty_pages(vm, &mut socket)? { - break; - } - } - - // Now pause VM - vm.pause()?; - - // Send last batch of dirty pages - Self::vm_maybe_send_dirty_pages(vm, &mut socket)?; + Self::do_memory_migration(vm, &mut socket, &mut s, &send_data_migration)?; } // We release the locks early to enable locking them on the destination host. @@ -1244,11 +1399,17 @@ impl Vmm { MigratableError::MigrateSend(anyhow!("Error completing migration")), )?; + // Record downtime + s.downtime = s.downtime_start.elapsed(); + // Stop logging dirty pages if !send_data_migration.local { vm.stop_dirty_log()?; } + // Record total migration time + s.total_time = s.start_time.elapsed(); + info!("Migration complete"); // Let every Migratable object know about the migration being complete diff --git a/vmm/src/memory_manager.rs b/vmm/src/memory_manager.rs index 74765cf514..ff040a043e 100644 --- a/vmm/src/memory_manager.rs +++ b/vmm/src/memory_manager.rs @@ -2624,11 +2624,11 @@ impl Migratable for MemoryManager { let sub_table = MemoryRangeTable::from_bitmap(dirty_bitmap, r.gpa, 4096); if sub_table.regions().is_empty() { - info!("Dirty Memory Range Table is empty"); + debug!("Dirty Memory Range Table is empty"); } else { - info!("Dirty Memory Range Table:"); + debug!("Dirty Memory Range Table:"); for range in sub_table.regions() { - info!("GPA: {:x} size: {} (KiB)", range.gpa, range.length / 1024); + trace!("GPA: {:x} size: {} (KiB)", range.gpa, range.length / 1024); } } From a7d90b8d66fa288367e1221d3435ec3f73a11618 Mon Sep 17 00:00:00 2001 From: Philipp Schuster Date: Mon, 1 Sep 2025 18:57:48 +0200 Subject: [PATCH 189/294] vmm: remove noop The vCPU lifecycle is already complicated. Let's remove dead code (impl is a no-op and there also will not be sensible code in the near future. --- vmm/src/cpu.rs | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/vmm/src/cpu.rs b/vmm/src/cpu.rs index 43edf5015b..ea7d350429 100644 --- a/vmm/src/cpu.rs +++ b/vmm/src/cpu.rs @@ -2317,10 +2317,9 @@ impl Pausable for CpuManager { self.signal_vcpus(); + #[cfg(all(feature = "kvm", target_arch = "x86_64"))] for vcpu in self.vcpus.iter() { - let mut vcpu = vcpu.lock().unwrap(); - vcpu.pause()?; - #[cfg(all(feature = "kvm", target_arch = "x86_64"))] + let vcpu = vcpu.lock().unwrap(); if !self.config.kvm_hyperv { vcpu.vcpu.notify_guest_clock_paused().map_err(|e| { MigratableError::Pause(anyhow!( @@ -2346,10 +2345,6 @@ impl Pausable for CpuManager { } fn resume(&mut self) -> std::result::Result<(), MigratableError> { - for vcpu in self.vcpus.iter() { - vcpu.lock().unwrap().resume()?; - } - // Toggle the vCPUs pause boolean self.vcpus_pause_signalled.store(false, Ordering::SeqCst); From 5403ac80ced46cb1c4f0f49413ae7b96cfeb4e39 Mon Sep 17 00:00:00 2001 From: Philipp Schuster Date: Mon, 1 Sep 2025 18:55:57 +0200 Subject: [PATCH 190/294] vmm: fix CpuManager::resume(): gracefully wait for run vCPU loop ACK It is odd that for pause(), the CpuManager waited via `state.paused` for the vCPU thread to ACK the state change but not for `resume()`. In the `resume()` case, oddly CpuManager "owned" the state change in `state.paused`. This commit changes this so that the vCPU ACKs its state change from pause->run is also gracefully recognized by `CpuManager::resume()`. This change ensures proper synchronization and prevents situations in that park() follows right after unpark(), causing deadlocks and other weird behavior due to race conditions. Calling resume() now takes slightly longer, very similar to pause(). This is, however, even for 254 vCPUs in the range of less than 10ms. ## Reproducer `ch-remote --api-socket ... pause` ```patch diff --git a/vmm/src/vm.rs b/vmm/src/vm.rs index d7bba25cc..35557d58f 100644 --- a/vmm/src/vm.rs +++ b/vmm/src/vm.rs @@ -2687,6 +2687,10 @@ impl Pausable for Vm { MigratableError::Pause(anyhow!("Error activating pending virtio devices: {:?}", e)) })?; + for _ in 0..1000 { + self.cpu_manager.lock().unwrap().pause()?; + self.cpu_manager.lock().unwrap().resume()?; + } self.cpu_manager.lock().unwrap().pause()?; self.device_manager.lock().unwrap().pause()?; ``` Since [0] is merged, this fix can be tested for example by modifying the pause() API call to run pause() and resume() in a loop a thousand times. With this change, things do not get stuck anymore. ## Outlook Decades of experience in VMM development showed us that using many AtomicBools is a footgun. They are not synchronized with each other at all. On the long term, we might want to refactor things to have a single AtomicU64 with different bits having different meanings. [0] https://github.com/cloud-hypervisor/cloud-hypervisor/pull/7290 --- vmm/src/cpu.rs | 29 +++++++++++++++++++++-------- 1 file changed, 21 insertions(+), 8 deletions(-) diff --git a/vmm/src/cpu.rs b/vmm/src/cpu.rs index ea7d350429..6a72e3d56b 100644 --- a/vmm/src/cpu.rs +++ b/vmm/src/cpu.rs @@ -633,6 +633,7 @@ struct VcpuState { handle: Option>, kill: Arc, vcpu_run_interrupted: Arc, + /// Used to ACK state changes from the run vCPU loop to the CPU Manager. paused: Arc, } @@ -1123,6 +1124,7 @@ impl CpuManager { while vcpu_pause_signalled.load(Ordering::SeqCst) { thread::park(); } + vcpu_paused.store(false, Ordering::SeqCst); vcpu_run_interrupted.store(false, Ordering::SeqCst); } @@ -2334,6 +2336,7 @@ impl Pausable for CpuManager { // activated vCPU change their state to ensure they have parked. for state in self.vcpu_states.iter() { if state.active() { + // wait for vCPU to update state while !state.paused.load(Ordering::SeqCst) { // To avoid a priority inversion with the vCPU thread thread::sleep(std::time::Duration::from_millis(1)); @@ -2345,16 +2348,26 @@ impl Pausable for CpuManager { } fn resume(&mut self) -> std::result::Result<(), MigratableError> { - // Toggle the vCPUs pause boolean + // Ensure that vCPUs keep running after being unpark() in + // their run vCPU loop. self.vcpus_pause_signalled.store(false, Ordering::SeqCst); - // Unpark all the VCPU threads. - // Once unparked, the next thing they will do is checking for the pause - // boolean. Since it'll be set to false, they will exit their pause loop - // and go back to vmx root. - for state in self.vcpu_states.iter() { - state.paused.store(false, Ordering::SeqCst); - state.unpark_thread(); + // Unpark all the vCPU threads. + // Step 1/2: signal each thread + { + for state in self.vcpu_states.iter() { + state.unpark_thread(); + } + } + // Step 2/2: wait for state ACK + { + for state in self.vcpu_states.iter() { + // wait for vCPU to update state + while state.paused.load(Ordering::SeqCst) { + // To avoid a priority inversion with the vCPU thread + thread::sleep(std::time::Duration::from_millis(1)); + } + } } Ok(()) } From fb58892c1fc2cf4f60da120b8e385f3f6ed6243c Mon Sep 17 00:00:00 2001 From: Philipp Schuster Date: Mon, 1 Sep 2025 19:32:08 +0200 Subject: [PATCH 191/294] vmm: refactor poor naming This makes things much clearer. Less cognitive load. --- vmm/src/cpu.rs | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/vmm/src/cpu.rs b/vmm/src/cpu.rs index 6a72e3d56b..ceec800f90 100644 --- a/vmm/src/cpu.rs +++ b/vmm/src/cpu.rs @@ -997,9 +997,9 @@ impl CpuManager { #[cfg(feature = "guest_debug")] let vm_debug_evt = self.vm_debug_evt.try_clone().unwrap(); let panic_exit_evt = self.exit_evt.try_clone().unwrap(); - let vcpu_kill_signalled = self.vcpus_kill_signalled.clone(); - let vcpu_pause_signalled = self.vcpus_pause_signalled.clone(); - let vcpu_kick_signalled = self.vcpus_kick_signalled.clone(); + let vcpus_kill_signalled_clone = self.vcpus_kill_signalled.clone(); + let vcpus_pause_signalled_clone = self.vcpus_pause_signalled.clone(); + let vcpus_kick_signalled_clone = self.vcpus_kick_signalled.clone(); let vcpu_kill = self.vcpu_states[usize::try_from(vcpu_id).unwrap()] .kill @@ -1092,7 +1092,7 @@ impl CpuManager { // loads and stores to different atomics and we need // to see them in a consistent order in all threads - if vcpu_pause_signalled.load(Ordering::SeqCst) { + if vcpus_pause_signalled_clone.load(Ordering::SeqCst) { // As a pause can be caused by PIO & MMIO exits then we need to ensure they are // completed by returning to KVM_RUN. From the kernel docs: // @@ -1121,14 +1121,14 @@ impl CpuManager { vcpu_run_interrupted.store(true, Ordering::SeqCst); vcpu_paused.store(true, Ordering::SeqCst); - while vcpu_pause_signalled.load(Ordering::SeqCst) { + while vcpus_pause_signalled_clone.load(Ordering::SeqCst) { thread::park(); } vcpu_paused.store(false, Ordering::SeqCst); vcpu_run_interrupted.store(false, Ordering::SeqCst); } - if vcpu_kick_signalled.load(Ordering::SeqCst) { + if vcpus_kick_signalled_clone.load(Ordering::SeqCst) { vcpu_run_interrupted.store(true, Ordering::SeqCst); #[cfg(target_arch = "x86_64")] match vcpu.lock().as_ref().unwrap().vcpu.nmi() { @@ -1141,7 +1141,7 @@ impl CpuManager { } // We've been told to terminate - if vcpu_kill_signalled.load(Ordering::SeqCst) + if vcpus_kill_signalled_clone.load(Ordering::SeqCst) || vcpu_kill.load(Ordering::SeqCst) { vcpu_run_interrupted.store(true, Ordering::SeqCst); @@ -1160,7 +1160,7 @@ impl CpuManager { info!("VmExit::Debug"); #[cfg(feature = "guest_debug")] { - vcpu_pause_signalled.store(true, Ordering::SeqCst); + vcpus_pause_signalled_clone.store(true, Ordering::SeqCst); let raw_tid = get_raw_tid(vcpu_id as usize); vm_debug_evt.write(raw_tid as u64).unwrap(); } @@ -1221,7 +1221,7 @@ impl CpuManager { } // We've been told to terminate - if vcpu_kill_signalled.load(Ordering::SeqCst) + if vcpus_kill_signalled_clone.load(Ordering::SeqCst) || vcpu_kill.load(Ordering::SeqCst) { vcpu_run_interrupted.store(true, Ordering::SeqCst); From 092d1c26704f6dfe9609bc9d5f4836aecf0ccd42 Mon Sep 17 00:00:00 2001 From: Philipp Schuster Date: Wed, 10 Sep 2025 16:46:51 +0200 Subject: [PATCH 192/294] vmm: properly unset immediate_exit on -EINTR Also see [0] for more info. [0] https://docs.kernel.org/virt/kvm/api.html#the-kvm-run-structure --- hypervisor/src/kvm/mod.rs | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/hypervisor/src/kvm/mod.rs b/hypervisor/src/kvm/mod.rs index 6becd0e411..5047869dd5 100644 --- a/hypervisor/src/kvm/mod.rs +++ b/hypervisor/src/kvm/mod.rs @@ -2029,7 +2029,8 @@ impl cpu::Vcpu for KvmVcpu { /// Triggers the running of the current virtual CPU returning an exit reason. /// fn run(&self) -> std::result::Result { - match self.fd.lock().unwrap().run() { + let mut lock = self.fd.lock().unwrap(); + match lock.run() { Ok(run) => match run { #[cfg(target_arch = "x86_64")] VcpuExit::IoIn(addr, data) => { @@ -2108,7 +2109,11 @@ impl cpu::Vcpu for KvmVcpu { }, Err(ref e) => match e.errno() { - libc::EAGAIN | libc::EINTR => Ok(cpu::VmExit::Ignore), + libc::EINTR => { + lock.set_kvm_immediate_exit(0); + Ok(cpu::VmExit::Ignore) + } + libc::EAGAIN => Ok(cpu::VmExit::Ignore), _ => Err(cpu::HypervisorCpuError::RunVcpu(anyhow!( "VCPU error {:?}", e From 894aef17676d2a3572839fbf5a5101a550bef552 Mon Sep 17 00:00:00 2001 From: Philipp Schuster Date: Thu, 11 Sep 2025 09:30:55 +0200 Subject: [PATCH 193/294] vmm: optimize lock usage No need to grab the lock multiple times in this short period of time. The lock is anyway held for the duration of the long operation (KVM_RUN). --- vmm/src/cpu.rs | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/vmm/src/cpu.rs b/vmm/src/cpu.rs index ceec800f90..192a2634fb 100644 --- a/vmm/src/cpu.rs +++ b/vmm/src/cpu.rs @@ -1110,12 +1110,14 @@ impl CpuManager { #[cfg(feature = "kvm")] if matches!(hypervisor_type, HypervisorType::Kvm) { - vcpu.lock().as_ref().unwrap().vcpu.set_immediate_exit(true); - if !matches!(vcpu.lock().unwrap().run(), Ok(VmExit::Ignore)) { + let lock = vcpu.lock(); + let lock = lock.as_ref().unwrap(); + lock.vcpu.set_immediate_exit(true); + if !matches!(lock.run(), Ok(VmExit::Ignore)) { error!("Unexpected VM exit on \"immediate_exit\" run"); break; } - vcpu.lock().as_ref().unwrap().vcpu.set_immediate_exit(false); + lock.vcpu.set_immediate_exit(false); } vcpu_run_interrupted.store(true, Ordering::SeqCst); From 744debfcc79e6274da2e5432653b4cf46e23856e Mon Sep 17 00:00:00 2001 From: Philipp Schuster Date: Wed, 10 Sep 2025 16:46:14 +0200 Subject: [PATCH 194/294] vmm: prerequisites for accessing KVM_RUN in the vCPU loop These are the prerequisites for the upcoming (quick and dirty) solution to the problem that we might miss some events. --- hypervisor/src/cpu.rs | 9 +++++++++ hypervisor/src/kvm/mod.rs | 13 +++++++++---- hypervisor/src/mshv/mod.rs | 7 +++++++ vmm/src/cpu.rs | 9 +++++++++ 4 files changed, 34 insertions(+), 4 deletions(-) diff --git a/hypervisor/src/cpu.rs b/hypervisor/src/cpu.rs index 5c377c5d1d..e138cb745b 100644 --- a/hypervisor/src/cpu.rs +++ b/hypervisor/src/cpu.rs @@ -10,6 +10,8 @@ // // +#[cfg(feature = "kvm")] +use std::os::fd::RawFd; #[cfg(target_arch = "aarch64")] use std::sync::Arc; @@ -602,4 +604,11 @@ pub trait Vcpu: Send + Sync { /// Trigger NMI interrupt /// fn nmi(&self) -> Result<()>; + /// Returns the underlying vCPU FD of KVM. + /// + /// # SAFETY + /// This is safe as we only use this to map the KVM_RUN structure for the + /// signal handler and only use it from there. + #[cfg(feature = "kvm")] + unsafe fn get_kvm_vcpu_raw_fd(&self) -> RawFd; } diff --git a/hypervisor/src/kvm/mod.rs b/hypervisor/src/kvm/mod.rs index 5047869dd5..961b62810a 100644 --- a/hypervisor/src/kvm/mod.rs +++ b/hypervisor/src/kvm/mod.rs @@ -14,10 +14,8 @@ use std::any::Any; use std::collections::HashMap; #[cfg(any(target_arch = "aarch64", target_arch = "riscv64"))] use std::mem::offset_of; -#[cfg(feature = "tdx")] -use std::os::unix::io::AsRawFd; -#[cfg(feature = "tdx")] -use std::os::unix::io::RawFd; +#[cfg(any(feature = "tdx", feature = "kvm"))] +use std::os::unix::io::{AsRawFd, RawFd}; use std::result; #[cfg(target_arch = "x86_64")] use std::sync::atomic::{AtomicBool, Ordering}; @@ -2816,6 +2814,13 @@ impl cpu::Vcpu for KvmVcpu { self.fd.lock().unwrap().set_kvm_immediate_exit(exit.into()); } + #[cfg(feature = "kvm")] + unsafe fn get_kvm_vcpu_raw_fd(&self) -> RawFd { + let kvm_vcpu = self.fd.lock().unwrap(); + let kvm_vcpu = &*kvm_vcpu; + kvm_vcpu.as_raw_fd() + } + /// /// Returns the details about TDX exit reason /// diff --git a/hypervisor/src/mshv/mod.rs b/hypervisor/src/mshv/mod.rs index bc03c44dab..40bdd81e93 100644 --- a/hypervisor/src/mshv/mod.rs +++ b/hypervisor/src/mshv/mod.rs @@ -41,6 +41,8 @@ pub mod x86_64; // aarch64 dependencies #[cfg(target_arch = "aarch64")] pub mod aarch64; +#[cfg(feature = "kvm")] +use std::os::fd::RawFd; use std::os::unix::io::AsRawFd; #[cfg(target_arch = "aarch64")] use std::sync::Mutex; @@ -1574,6 +1576,11 @@ impl cpu::Vcpu for MshvVcpu { Ok(()) } + + #[cfg(feature = "kvm")] + unsafe fn get_kvm_vcpu_raw_fd(&self) -> RawFd { + todo!() + } } impl MshvVcpu { diff --git a/vmm/src/cpu.rs b/vmm/src/cpu.rs index 192a2634fb..b293943ccb 100644 --- a/vmm/src/cpu.rs +++ b/vmm/src/cpu.rs @@ -16,6 +16,8 @@ use std::collections::BTreeMap; use std::io::Write; #[cfg(all(target_arch = "x86_64", feature = "guest_debug"))] use std::mem::size_of; +#[cfg(feature = "kvm")] +use std::os::fd::RawFd; use std::os::unix::thread::JoinHandleExt; use std::sync::atomic::{AtomicBool, Ordering}; use std::sync::{Arc, Barrier, Mutex}; @@ -485,6 +487,13 @@ impl Vcpu { .map_err(Error::VcpuSetGicrBaseAddr)?; Ok(()) } + + #[cfg(feature = "kvm")] + pub fn get_kvm_vcpu_raw_fd(&self) -> RawFd { + // SAFETY: We happen to know that all current uses respect the safety contract. + // TODO find a better way to keep this safe and/or express its fragile state. + unsafe { self.vcpu.get_kvm_vcpu_raw_fd() } + } } impl Pausable for Vcpu {} From 1d012190bea57e5e018cd6077b49ff1d328fbe34 Mon Sep 17 00:00:00 2001 From: Philipp Schuster Date: Wed, 10 Sep 2025 16:47:06 +0200 Subject: [PATCH 195/294] vmm: fix kicking vCPU out of KVM_RUN from signal handler A common scenario for a VMM to regain control over the vCPU thread from the hypervisor is to interrupt the vCPU. A use-case might be the `pause` API call of CHV. VMMs using KVM as hypervisor must use signals for this interception, i.e., a thread sends a signal to the vCPU thread. Sending and handling these signals is inherently racy because the signal sender does not know if the receiving thread is currently in the RUN_VCPU [0] call, or executing userspace VMM code. If we are in kernel space in KVM_RUN, things are easy as KVM just exits with -EINTR. For user-space this is more complicated. For example, it might happen that we receive a signal but the vCPU thread was about to go into the KVM_RUN system call as next instruction. There is no more opportunity to check for any pending signal flag or similar. KVM offers the `immediate_exit` flag [1] as part of the KVM_RUN structure for that. The signal handler of a vCPU is supposed to set this flag, to ensure that we do not miss any events. If the flag is set, KVM_RUN will exit immediately [2]. We will miss signals to the vCPU if the vCPU thread is in userspace VMM code and we do not use the `immediate_exit` flag. We must have access to the KVM_RUN data structure when the signal handler executes in a vCPU thread's context and set the `immediate_exit` [1] flag. This way, the next invocation of KVM_RUN exits immediately and the userspace VMM code can do the normal event handling. We must not use any shared locks between the normal vCPU thread VMM code and the signal handler, as otherwise we might end up in deadlocks. The signal handler therefore needs its dedicated mutable version of KVM_RUN. This commit introduces a (very hacky but good enough for a PoC) solution to this problem. [0] https://docs.kernel.org/virt/kvm/api.html#kvm-run [1] https://docs.kernel.org/virt/kvm/api.html#the-kvm-run-structure [2] https://elixir.bootlin.com/linux/v6.12/source/arch/x86/kvm/x86.c#L11566 --- Cargo.lock | 1 + hypervisor/src/mshv/mod.rs | 2 +- vmm/Cargo.toml | 1 + vmm/src/cpu.rs | 64 ++++++++++++++++++++++++++++++++++++-- vmm/src/lib.rs | 10 ++++++ 5 files changed, 75 insertions(+), 3 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 14d441e388..ed015d1ce7 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2425,6 +2425,7 @@ dependencies = [ "hypervisor", "igvm", "igvm_defs", + "kvm-bindings", "landlock", "libc", "linux-loader", diff --git a/hypervisor/src/mshv/mod.rs b/hypervisor/src/mshv/mod.rs index 40bdd81e93..294dee2e7d 100644 --- a/hypervisor/src/mshv/mod.rs +++ b/hypervisor/src/mshv/mod.rs @@ -1579,7 +1579,7 @@ impl cpu::Vcpu for MshvVcpu { #[cfg(feature = "kvm")] unsafe fn get_kvm_vcpu_raw_fd(&self) -> RawFd { - todo!() + unimplemented!() } } diff --git a/vmm/Cargo.toml b/vmm/Cargo.toml index 25683e83ff..f6d5e9cb8b 100644 --- a/vmm/Cargo.toml +++ b/vmm/Cargo.toml @@ -54,6 +54,7 @@ hex = { version = "0.4.3", optional = true } hypervisor = { path = "../hypervisor" } igvm = { workspace = true, optional = true } igvm_defs = { workspace = true, optional = true } +kvm-bindings = { workspace = true } landlock = "0.4.2" libc = { workspace = true } linux-loader = { workspace = true, features = ["bzimage", "elf", "pe"] } diff --git a/vmm/src/cpu.rs b/vmm/src/cpu.rs index b293943ccb..d34b151ddd 100644 --- a/vmm/src/cpu.rs +++ b/vmm/src/cpu.rs @@ -16,8 +16,6 @@ use std::collections::BTreeMap; use std::io::Write; #[cfg(all(target_arch = "x86_64", feature = "guest_debug"))] use std::mem::size_of; -#[cfg(feature = "kvm")] -use std::os::fd::RawFd; use std::os::unix::thread::JoinHandleExt; use std::sync::atomic::{AtomicBool, Ordering}; use std::sync::{Arc, Barrier, Mutex}; @@ -75,6 +73,8 @@ use vm_migration::{ use vmm_sys_util::eventfd::EventFd; use vmm_sys_util::signal::{SIGRTMIN, register_signal_handler}; use zerocopy::{FromBytes, Immutable, IntoBytes}; +#[cfg(feature = "kvm")] +use {kvm_bindings::kvm_run, std::cell::Cell, std::os::fd::RawFd, std::sync::RwLock}; #[cfg(all(target_arch = "x86_64", feature = "guest_debug"))] use crate::coredump::{ @@ -90,6 +90,16 @@ use crate::vm::physical_bits; use crate::vm_config::CpusConfig; use crate::{CPU_MANAGER_SNAPSHOT_ID, GuestMemoryMmap}; +#[cfg(feature = "kvm")] +thread_local! { + static KVM_RUN: Cell<*mut kvm_run> = const {Cell::new(core::ptr::null_mut())}; +} +#[cfg(feature = "kvm")] +/// Tell signal handler to not access certain stuff anymore during shutdown. +/// Otherwise => panics. +/// Better alternative would be to prevent signals there at all. +pub static IS_IN_SHUTDOWN: RwLock = RwLock::new(false); + #[cfg(all(target_arch = "aarch64", feature = "guest_debug"))] /// Extract the specified bits of a 64-bit integer. /// For example, to extrace 2 bits from offset 1 (zero based) of `6u64`, @@ -1051,6 +1061,28 @@ impl CpuManager { thread::Builder::new() .name(format!("vcpu{vcpu_id}")) .spawn(move || { + // init thread-local kvm_run structure + #[cfg(feature = "kvm")] + { + let raw_kvm_fd = vcpu.lock().unwrap().get_kvm_vcpu_raw_fd(); + + // SAFETY: We know the FD is valid and have the proper args. + let buffer = unsafe { + libc::mmap( + core::ptr::null_mut(), + 4096, + libc::PROT_WRITE | libc::PROT_READ, + libc::MAP_SHARED, + raw_kvm_fd, + 0, + ) + }; + assert!(!buffer.is_null()); + assert_ne!(buffer, libc::MAP_FAILED); + let kvm_run = buffer.cast::(); + KVM_RUN.set(kvm_run); + } + // Schedule the thread to run on the expected CPU set if let Some(cpuset) = cpuset.as_ref() { // SAFETY: FFI call with correct arguments @@ -1080,7 +1112,35 @@ impl CpuManager { return; } + #[cfg(not(feature = "kvm"))] extern "C" fn handle_signal(_: i32, _: *mut siginfo_t, _: *mut c_void) {} + #[cfg(feature = "kvm")] + extern "C" fn handle_signal(_: i32, _: *mut siginfo_t, _: *mut c_void) { + // We do not need a self-pipe for safe UNIX signal handling here as in this + // signal handler, we only expect the same signal over and over again. While + // different signals can interrupt a signal being handled, the same signal + // again can't by default. Therefore, this is safe. + + // This lock prevents accessing thread locals when a signal is received + // in the teardown phase of the Rust standard library. Otherwise, we would + // panic. + // + // Masking signals would be a nicer approach but this is the pragmatic + // solution. + // + // We don't have lock contention in normal operation. When the writer + // sets the bool to true, the lock is only held for a couple of µs. + let lock = IS_IN_SHUTDOWN.read().unwrap(); + if *lock { + return; + } + + let kvm_run = KVM_RUN.get(); + // SAFETY: the mapping is valid + let kvm_run = unsafe { + kvm_run.as_mut().expect("kvm_run should have been mapped as part of vCPU setup") }; + kvm_run.immediate_exit = 1; + } // This uses an async signal safe handler to kill the vcpu handles. register_signal_handler(SIGRTMIN(), handle_signal) .expect("Failed to register vcpu signal handler"); diff --git a/vmm/src/lib.rs b/vmm/src/lib.rs index df8ddfb774..29266f6d34 100644 --- a/vmm/src/lib.rs +++ b/vmm/src/lib.rs @@ -56,6 +56,8 @@ use crate::api::{ use crate::config::{RestoreConfig, add_to_config}; #[cfg(all(target_arch = "x86_64", feature = "guest_debug"))] use crate::coredump::GuestDebuggable; +#[cfg(feature = "kvm")] +use crate::cpu::IS_IN_SHUTDOWN; use crate::landlock::Landlock; use crate::memory_manager::MemoryManager; #[cfg(all(feature = "kvm", target_arch = "x86_64"))] @@ -1380,6 +1382,14 @@ impl Vmm { vm.release_disk_locks() .map_err(|e| MigratableError::UnlockError(anyhow!("{e}")))?; + #[cfg(feature = "kvm")] + // Prevent signal handler to access thread local storage when signals are received + // close to the end when thread-local storage is already destroyed. + { + let mut lock = IS_IN_SHUTDOWN.write().unwrap(); + *lock = true; + } + // Capture snapshot and send it let vm_snapshot = vm.snapshot()?; let snapshot_data = serde_json::to_vec(&vm_snapshot).unwrap(); From 2c7efb27f0f53841d01723cb820f387c7b086ee6 Mon Sep 17 00:00:00 2001 From: Philipp Schuster Date: Thu, 11 Sep 2025 08:48:41 +0200 Subject: [PATCH 196/294] vmm: temporarily make "resize" API call fail fast --- vmm/src/lib.rs | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/vmm/src/lib.rs b/vmm/src/lib.rs index 29266f6d34..980f098c88 100644 --- a/vmm/src/lib.rs +++ b/vmm/src/lib.rs @@ -1989,6 +1989,10 @@ impl RequestHandler for Vmm { ) -> result::Result<(), VmError> { self.vm_config.as_ref().ok_or(VmError::VmNotCreated)?; + if desired_vcpus.is_some() { + todo!("doesn't work currently with our thread-local KVM_RUN approach"); + } + if let Some(ref mut vm) = self.vm { if let Err(e) = vm.resize(desired_vcpus, desired_ram, desired_balloon) { error!("Error when resizing VM: {:?}", e); From 1d0d272da742fec0e1ef0129a1def829c68d2195 Mon Sep 17 00:00:00 2001 From: Philipp Schuster Date: Tue, 10 Jun 2025 11:38:41 +0200 Subject: [PATCH 197/294] vm-migration: add vCPU throttling (auto-converge) for pre-copy auto-converge (vCPU throttling) is a technique combined with precopy live-migration flows to migrate VMs with a high dirty rate (high working set with many writes). It is an alternative to postcopy migration, which is not yet implemented in Cloud Hypervisor. By throttling the vCPUs incrementally, the dirty rate drops and the VM migrates (converges) eventually. More specifically, the reduced dirty rate ensures that the configured downtime can be reached. The implementation is inspired by QEMU, but adapted to Cloud Hypervisor. Various discussions, intermediate steps, and experiments lead to this final result. vCPU throttling was implemented with a dedicated thread and a manager for that thread. This thread utilizes the CpuManager's pause() and resume() in conjunction with (interruptible) sleeps to apply the current throttling percentage onto the vCPUs, thus the VM. The implementation is designed to not block or delay normal operation any longer than necessary. The proposed design relies on the recent improvements and fixes for CpuManager's pause() and resume(). For correctness, on each pause/resume cycle, the time for these actions is measured. This way, a dynamic timeslice can be used, guaranteeing the VM is indeed throttled at the indented percentage. Although not supported yet by Cloud Hypervisor, this thread will support throttling cancellation when live-migrations are cancelled. This was intensively tested in an automated setup with thousands of live-migrations with VMs under load. - auto-converging starts always after two memory delta transfer iterations - every two iterations, it is increased (step size is 10%) - maximum throttling is 99% - the VM will get slower. At 99% throttling, it will be unsurprisingly barely usable. This is something users have to accept if they want to migrate their VMs running heavy workloads. Signed-off-by: Philipp Schuster Reviewed-by: Stefan Kober Reviewed-by: Oliver Anderson Reviewed-by: Thomas Prescher On-behalf-of: SAP philipp.schuster@sap.com --- vmm/src/lib.rs | 43 +++ vmm/src/vcpu_throttling.rs | 604 +++++++++++++++++++++++++++++++++++++ vmm/src/vm.rs | 32 ++ 3 files changed, 679 insertions(+) create mode 100644 vmm/src/vcpu_throttling.rs diff --git a/vmm/src/lib.rs b/vmm/src/lib.rs index 980f098c88..19f5a5cc06 100644 --- a/vmm/src/lib.rs +++ b/vmm/src/lib.rs @@ -8,6 +8,15 @@ extern crate event_monitor; #[macro_use] extern crate log; +/// Amount of iterations before auto-converging starts. +const AUTO_CONVERGE_ITERATION_DELAY: u64 = 2; +/// Step size in percent to increase the vCPU throttling. +const AUTO_CONVERGE_STEP_SIZE: u8 = 10; +/// Amount of iterations after that we increase vCPU throttling. +const AUTO_CONVERGE_ITERATION_INCREASE: u64 = 2; +/// Maximum vCPU throttling value. +const AUTO_CONVERGE_MAX: u8 = 99; + use std::collections::HashMap; use std::fs::File; use std::io::{Read, Write, stdout}; @@ -93,6 +102,7 @@ mod pci_segment; pub mod seccomp_filters; mod serial_manager; mod sigwinch_listener; +mod vcpu_throttling; pub mod vm; pub mod vm_config; @@ -1145,6 +1155,15 @@ impl Vmm { Ok(true) } + fn can_increase_autoconverge_step(s: &MigrationState) -> bool { + if s.iteration < AUTO_CONVERGE_ITERATION_DELAY { + false + } else { + let iteration = s.iteration - AUTO_CONVERGE_ITERATION_DELAY; + iteration % AUTO_CONVERGE_ITERATION_INCREASE == 0 + } + } + fn memory_copy_iterations( vm: &mut Vm, socket: &mut SocketStream, @@ -1156,6 +1175,18 @@ impl Vmm { let mut iteration_table; loop { + // todo: check if auto-converge is enabled at all? + if Self::can_increase_autoconverge_step(s) && vm.throttle_percent() < AUTO_CONVERGE_MAX + { + let current_throttle = vm.throttle_percent(); + let new_throttle = current_throttle + AUTO_CONVERGE_STEP_SIZE; + let new_throttle = std::cmp::min(new_throttle, AUTO_CONVERGE_MAX); + log::info!("Increasing auto-converge: {new_throttle}%"); + if new_throttle != current_throttle { + vm.set_throttle_percent(new_throttle); + } + } + // Update the start time of the iteration s.iteration_start_time = Instant::now(); @@ -1216,6 +1247,12 @@ impl Vmm { s.pages_per_second = s.current_dirty_pages * 1000 / s.iteration_cost_time.as_millis() as u64; } + debug!( + "iteration {}: cost={}ms, throttle={}%", + s.iteration, + s.iteration_cost_time.as_millis(), + vm.throttle_percent() + ); } Ok(iteration_table) @@ -1270,7 +1307,13 @@ impl Vmm { info!("Entering downtime phase"); s.downtime_start = Instant::now(); + // End throttle thread + info!("stopping vcpu thread"); + vm.stop_vcpu_throttling(); + info!("stopped vcpu thread"); + info!("pausing VM"); vm.pause()?; + info!("paused VM"); // Send last batch of dirty pages let mut final_table = vm.dirty_log()?; diff --git a/vmm/src/vcpu_throttling.rs b/vmm/src/vcpu_throttling.rs new file mode 100644 index 0000000000..7e74b702d5 --- /dev/null +++ b/vmm/src/vcpu_throttling.rs @@ -0,0 +1,604 @@ +// Copyright © 2025 Cyberus Technology GmbH +// +// SPDX-License-Identifier: Apache-2.0 + +//! # vCPU throttling for Auto Converging +//! +//! vCPU throttling is crucial to reach a reasonable downtime when using a +//! precopy strategy for live-migration of VMs with memory-intensive workloads. +//! Auto converge means an increasing vCPU throttling over time until the memory +//! delta is small enough for the migration thread(s) to perform the switch-over +//! to the new host. +//! +//! Therefore, the migration thread(s) use this thread to help them reach their +//! goal. Next to typical lifecycle management, this thread must fulfill various +//! requirements to ensure a minimal downtime. +//! +//! ## Thread Requirements +//! - Needs to be able to gracefully wait for work. +//! - Must be able to exit gracefully. +//! - Must be able to cancel any work and return to its init state to support +//! live-migration cancellation and restart of live-migrations. +//! - Must not block the migration thread(s) whenever possible, to facilitate +//! fast live-migrations with short downtimes. +//! - Must be interruptible during a sleep phase to not block the migration +//! thread(s). +//! - Must not confuse or hinder the migration thread(s) regarding +//! pause()/resume() operations. Context: migration thread shuts down the +//! vCPUs for the handover. The throttle thread must not restart the vCPUs +//! again. + +use std::cell::Cell; +use std::cmp::min; +use std::sync::mpsc::RecvTimeoutError; +use std::sync::{Arc, Mutex, mpsc}; +use std::thread; +use std::thread::JoinHandle; +use std::time::{Duration, Instant}; + +use vm_migration::Pausable; + +use crate::cpu::CpuManager; + +/// The possible command of the thread, i.e., the current state. +#[derive(Debug, Copy, Clone, PartialEq, Eq)] +enum ThrottleCommand { + /// Waiting for next event. + Waiting, + /// Ongoing vCPU throttling. + /// + /// The inner value shows the current throttling percentage in range `1..=99`. + Throttling(u8 /* `1..=99` */), + /// Thread is shutting down gracefully. + Exiting, +} + +/// Helper to adapt the throttling timeslice as we go, depending on the time it +/// takes to pause() and resume() all vCPUs. +#[derive(Debug)] +struct TimesliceContext { + current_timeslice: Duration, + /// Duration it took to pause() all vCPUs on the previous iteration. + previous_pause_duration: Duration, + /// Duration it took to resume() all vCPUs on the previous iteration. + previous_resume_duration: Duration, +} + +impl TimesliceContext { + /// The initial timeslice for a throttling cycle (vCPU pause & resume). + const INITIAL_TIMESLICE: Duration = Duration::from_millis(100); + + /// The minimal value for the operations. + /// + /// Any value smaller than this is upgraded to this to prevent math + /// exceptions during timing calculations. + const MIN_DURATION: Duration = Duration::from_millis(1); + + /// Maximum time slice. This should not be too big. + /// + /// Otherwise, for example: Assuming we have 10% throttling and + /// 2000ms time slice, then the WM will be unresponsive for + /// 200ms every 1800ms. This is not convenient. /// + const MAX_TIMESLICE: Duration = Duration::from_millis(800); + + /// Creates a new instance with [`Self::INITIAL_TIMESLICE`]. + fn new() -> Self { + Self { + current_timeslice: Self::INITIAL_TIMESLICE, + previous_pause_duration: Self::MIN_DURATION, + previous_resume_duration: Self::MIN_DURATION, + } + } + + /// Updates the timeslice. + fn update_timeslice(&mut self) { + // CpuManager::pause() plus CpuManager::resume() without additional delay is the shortest + // we can get. + let one_percent = self.previous_pause_duration + self.previous_resume_duration; + self.current_timeslice = one_percent * 100; + self.current_timeslice = min(self.current_timeslice, Self::MAX_TIMESLICE); + } + + /// Calculates the sleep durations for after the `pause()` and `resume()` operations with + /// the current `timeslice`. + /// + /// It uses the `timeslice` that was calculated on the previous + /// invocation of [`Self::update_timeslice`]. + fn calc_sleep_durations( + &mut self, + percentage: u64, + ) -> ( + Duration, /* after pause */ + Duration, /* after resume */ + ) { + assert!(percentage <= 100); + assert!(percentage > 0); + + let timeslice_ms = self.current_timeslice.as_millis() as u64; + let wait_ms_after_pause_ms = timeslice_ms * percentage / 100; + let wait_ms_after_resume_ms = timeslice_ms - wait_ms_after_pause_ms; + + let wait_ms_after_pause_ms = + wait_ms_after_pause_ms.saturating_sub(self.previous_pause_duration.as_millis() as u64); + let wait_ms_after_resume_ms = wait_ms_after_resume_ms + .saturating_sub(self.previous_resume_duration.as_millis() as u64); + + ( + Duration::from_millis(wait_ms_after_pause_ms), + Duration::from_millis(wait_ms_after_resume_ms), + ) + } + + /// Set the previous pause duration. + /// + /// In case this is below [`Self::MIN_DURATION`], we upgrade it to [`Self::MIN_DURATION`]. + pub fn set_previous_pause_duration(&mut self, mut duration: Duration) { + if duration < Self::MIN_DURATION { + duration = Self::MIN_DURATION + } + + self.previous_pause_duration = duration; + } + + /// Set the duration it took to `resume()` all vCPUs on the previous iteration. + /// + /// In case this is below [`Self::MIN_DURATION`], we upgrade it to [`Self::MIN_DURATION`]. + pub fn set_previous_resume_duration(&mut self, mut duration: Duration) { + if duration < Self::MIN_DURATION { + duration = Self::MIN_DURATION + } + self.previous_resume_duration = duration; + } +} + +/// Context of the vCPU throttle thread. +// The main justification for this dedicated type is to split the thread +// functions from the higher-level control API. +// TODO seccomp is missing +pub struct ThrottleWorker { + handle: Option>, +} + +impl ThrottleWorker { + /// This should not be named "vcpu*" as libvirt fails when + /// iterating the vCPU threads then. Fix this first in libvirt! + const THREAD_NAME: &'static str = "throttle-vcpu"; + + /// Executes the provided callback and goes to sleep until the specified + /// `sleep_duration` passed. + /// + /// The time to execute the callback itself is not taken into account + /// when sleeping for `sleep_duration`. Therefore, the callback is + /// supposed to be quick (a couple of milliseconds). + /// + /// The thread is interruptible during the sleep phase when the `receiver` + /// receives a new [`ThrottleCommand`]. + /// + /// # Arguments + /// - `callback`: Function to run + /// - `set_callback_duration`: Set the duration to execute the callback. + /// - `sleep_duration`: Duration this function takes at most, including + /// running the `callback`. + /// - `receiver`: Receiving end of the channel to the migration managing + /// thread. + fn execute_and_wait_interruptible( + callback: &impl Fn(), + mut set_callback_duration: impl FnMut(Duration), + sleep_duration: Duration, + receiver: &mpsc::Receiver, + ) -> Option { + let begin = Instant::now(); + callback(); + let cb_duration = begin.elapsed(); + // Help to adjust the timeslice in the next cycle. + set_callback_duration(cb_duration); + + // It might happen that sometimes we get interrupted during a sleep phase + // with a new higher throttle percentage but this is negligible. For an + // auto-converge cycle, there are typically only ~10 steps involved over + // a time frame from a couple of seconds up to a couple of minutes. + match receiver.recv_timeout(sleep_duration) { + Ok(next_task) => Some(next_task), + Err(RecvTimeoutError::Timeout) => None, + Err(RecvTimeoutError::Disconnected) => { + panic!("thread and channel should exit gracefully") + } + } + } + + /// Executes one throttling step: either pause or resume of vCPUs. + /// + /// Runs the given callback, then waits for the specified duration, unless + /// interrupted by a new [`ThrottleCommand`]. + /// + /// # Behavior + /// - Runs the provided `callback` immediately. + /// - Waits up to `duration` for new commands on the `receiver`. + /// - If no command arrives before the timeout, this step completes + /// normally and returns `None`. + /// - If a [`ThrottleCommand::Throttling`] arrives, updates the current + /// throttle percentage in `current_throttle` and continues with the + /// loop. Returns `None`. + /// - If a [`ThrottleCommand::Waiting`] or [`ThrottleCommand::Exiting`] + /// arrives, this command is forwarded to the caller. + /// + /// # Arguments + /// - `callback`: Function to run (e.g., pause or resume vCPUs). + /// - `set_callback_duration`: Set the duration to execute the callback. + /// - `receiver`: Channel for receiving new [`ThrottleCommand`]s. + /// - `current_throttle`: Mutable reference to the current throttle + /// percentage (updated on [`ThrottleCommand::Throttling`]). + /// + /// # Returns + /// - `None` if the throttling cycle should continue. + /// - `Some(ThrottleCommand::Waiting | ThrottleCommand::Exiting)` if + /// throttling should stop. + fn throttle_step( + callback: &F, + set_callback_duration: impl FnMut(Duration), + duration: Duration, + receiver: &mpsc::Receiver, + current_throttle: &mut u64, + ) -> Option + where + F: Fn(), + { + let maybe_task = Self::execute_and_wait_interruptible( + callback, + set_callback_duration, + duration, + receiver, + ); + match maybe_task { + None => None, + Some(ThrottleCommand::Throttling(next)) => { + // A new throttle value is only applied at the end of a full + // throttling cycle. This is fine and negligible in a series of + // (tens of) thousands of cycles. + *current_throttle = next as u64; + None + } + Some(cmd @ (ThrottleCommand::Exiting | ThrottleCommand::Waiting)) => Some(cmd), + } + } + + /// Helper for [`Self::control_loop`] that runs the actual throttling loop. + /// + /// This function returns the next [`ThrottleCommand`] **only** if the thread + /// stopped the vCPU throttling. + fn throttle_loop( + receiver: &mpsc::Receiver, + initial_throttle: u8, + callback_pause_vcpus: &impl Fn(), + callback_resume_vcpus: &impl Fn(), + ) -> ThrottleCommand { + // The current throttle value, as long as the thread is throttling. + let mut current_throttle = initial_throttle as u64; + let mut timeslice_ctx = TimesliceContext::new(); + + loop { + // Catch logic bug: We should have exited in this case already. + assert_ne!(current_throttle, 0); + assert!(current_throttle < 100); + + let (wait_ms_after_pause, wait_ms_after_resume) = + timeslice_ctx.calc_sleep_durations(current_throttle); + + // pause vCPUs + if let Some(cmd) = Self::throttle_step( + callback_pause_vcpus, + |new_duration| timeslice_ctx.set_previous_pause_duration(new_duration), + wait_ms_after_pause, + receiver, + &mut current_throttle, + ) { + // TODO: future optimization + // Prevent unnecessary resume() here when the migration thread + // performs .pause() right after anyway. We could make .pause() and + // .resume() idempotent. + callback_resume_vcpus(); + // We only exit here in case if ThrottleCommand::Waiting or ::Exiting + return cmd; + } + + // resume vCPUs + if let Some(cmd) = Self::throttle_step( + callback_resume_vcpus, + |new_duration| timeslice_ctx.set_previous_resume_duration(new_duration), + wait_ms_after_resume, + receiver, + &mut current_throttle, + ) { + // We only exit here in case if ThrottleCommand::Waiting or ::Exiting + return cmd; + } + + // Update timeslice for next cycle. This way, we can closely match the expected + // percentage for pause() and resume(). + timeslice_ctx.update_timeslice(); + } + } + + /// Implements the control loop of the thread. + /// + /// It wraps the actual throttling with the necessary thread lifecycle + /// management. + fn control_loop( + receiver: mpsc::Receiver, + callback_pause_vcpus: impl Fn() + Send + 'static, + callback_resume_vcpus: impl Fn() + Send + 'static, + ) -> impl Fn() { + move || { + // In the outer loop, we gracefully wait for commands. + 'control: loop { + let thread_task = receiver.recv().expect("channel should not be closed"); + match thread_task { + ThrottleCommand::Exiting => { + break 'control; + } + ThrottleCommand::Waiting => { + continue 'control; + } + ThrottleCommand::Throttling(initial_throttle) => { + let next_task = Self::throttle_loop( + &receiver, + initial_throttle, + &callback_pause_vcpus, + &callback_resume_vcpus, + ); + if next_task == ThrottleCommand::Exiting { + break 'control; + } + // else: thread is in Waiting state + } + } + } + debug!("thread exited gracefully"); + } + } + + /// Spawns a new thread. + fn spawn( + receiver: mpsc::Receiver, + callback_pause_vcpus: impl Fn() + Send + 'static, + callback_resume_vcpus: impl Fn() + Send + 'static, + ) -> Self { + let handle = { + let thread_fn = + Self::control_loop(receiver, callback_pause_vcpus, callback_resume_vcpus); + thread::Builder::new() + .name(String::from(Self::THREAD_NAME)) + .spawn(thread_fn) + .expect("should spawn thread") + }; + + Self { + handle: Some(handle), + } + } +} + +impl Drop for ThrottleWorker { + fn drop(&mut self) { + // Note: The thread handle must send the shutdown command first. + if let Some(handle) = self.handle.take() { + handle.join().expect("thread should have succeeded"); + } + } +} + +/// Handler for controlling the vCPU throttle thread. +/// +/// vCPU throttling is needed for live-migration of memory-intensive workloads. +/// The current design assumes that all vCPUs are throttled equally. +/// +/// # Transitions +/// - `Waiting` -> `Throttling(x %)`, `Exit` +/// - `Throttling(x %)` -> `Exit`, `Waiting`, `Throttling(y %)` +/// - `Exiting` +pub struct ThrottleThreadHandle { + /// Thread state wrapped by synchronization primitives. + state_sender: mpsc::Sender, + /// Current throttle value. + /// + /// This is the last throttle value that was sent to the + /// thread. + current_throttle: Cell, + /// The underlying thread handle. Option to have more control over when it is dropped. + throttle_thread: Option, +} + +impl ThrottleThreadHandle { + /// Spawns a new thread and returning a handle to it. + /// + /// # Parameters + /// - `cpu_manager`: CPU manager to pause and resume vCPUs + pub fn new_from_cpu_manager(cpu_manager: &Arc>) -> Self { + let callback_pause_vcpus = { + let cpu_manager = cpu_manager.clone(); + Box::new(move || cpu_manager.lock().unwrap().pause().unwrap()) + }; + + let callback_resume_vcpus = { + let cpu_manager = cpu_manager.clone(); + Box::new(move || cpu_manager.lock().unwrap().resume().unwrap()) + }; + + Self::new(callback_pause_vcpus, callback_resume_vcpus) + } + + /// Spawns a new thread and returning a handle to it. + /// + /// This function returns when the thread gracefully arrived in + /// [`ThrottleCommand::Waiting`]. + /// + /// # Parameters + /// - `callback_pause_vcpus`: Function putting all vCPUs into pause state. The + /// function must not perform any artificial delay itself. + /// - `callback_resume_vcpus`: Function putting all vCPUs back into running + /// state. The function must not perform any artificial delay itself. + fn new( + callback_pause_vcpus: Box, + callback_resume_vcpus: Box, + ) -> Self { + // Channel used for synchronization. + let (sender, receiver) = mpsc::channel::(); + + let thread = ThrottleWorker::spawn(receiver, callback_pause_vcpus, callback_resume_vcpus); + + Self { + state_sender: sender, + current_throttle: Cell::new(0), + throttle_thread: Some(thread), + } + } + + /// Set's the throttle percentage to a value in range `0..=99` and updates + /// the thread's state. + /// + /// Setting the value back to `0` equals setting the thread back into + /// [`ThrottleCommand::Waiting`]. + /// + /// In case of an ongoing throttling cycle (vCPU pause & resume), any new + /// throttling percentage will be applied no later than when the current cycle + /// ends. + /// + /// # Panic + /// Panics, if `percent_new` is not in range `0..=99`. + pub fn set_throttle_percent(&self, percent_new: u8) { + assert!( + percent_new <= 100, + "setting a percentage of 100 or above is not allowed: {percent_new}%" + ); + + // We have no problematic race condition here as in normal operation + // there is exactly one thread calling these functions. + let percent_old = self.throttle_percent(); + + // Return early, no action needed. + if percent_old == percent_new { + return; + } + + if percent_new == 0 { + self.state_sender + .send(ThrottleCommand::Waiting) + .expect("channel should not be closed"); + } else { + self.state_sender + .send(ThrottleCommand::Throttling(percent_new)) + .expect("channel should not be closed"); + }; + + self.current_throttle.set(percent_new); + } + + /// Get the current throttle percentage in range `0..=99`. + /// + /// Please note that the value is not synchronized. + pub fn throttle_percent(&self) -> u8 { + self.current_throttle.get() + } + + /// Stops and terminates the thread gracefully. + /// + /// Waits for the thread to finish. This function **must** be called before + /// the migration thread(s) do anything with the CPU manager to prevent + /// odd states. + pub fn shutdown(&mut self) { + let begin = Instant::now(); + + { + // drop thread; ensure that the channel is still alive when it is dropped + if let Some(worker) = self.throttle_thread.take() { + self.state_sender + .send(ThrottleCommand::Exiting) + .expect("channel should not be closed"); + + // Ensure the sender is still living when this is dropped. + drop(worker); + } + } + + let elapsed = begin.elapsed(); + if elapsed > Duration::from_millis(20) { + warn!( + "shutting down thread takes too long ({} ms): this increases the downtime!", + elapsed.as_millis() + ); + } + } +} + +impl Drop for ThrottleThreadHandle { + fn drop(&mut self) { + self.shutdown(); + } +} + +#[cfg(test)] +mod tests { + use std::sync::atomic::{AtomicBool, Ordering}; + use std::thread::sleep; + + use super::*; + + // The test is successful if it does not get stuck. Then, the thread exits + // gracefully. + #[test] + fn test_vcpu_throttling_thread_lifecycle() { + for _ in 0..5 { + // State transitions: Waiting -> Exit + { + let mut handler = ThrottleThreadHandle::new(Box::new(|| {}), Box::new(|| {})); + + // The test is successful if it does not get stuck. + handler.shutdown(); + } + + // Dummy CpuManager + let cpus_throttled = Arc::new(AtomicBool::new(false)); + let callback_pause_vcpus = { + let cpus_running = cpus_throttled.clone(); + Box::new(move || { + let old = cpus_running.swap(true, Ordering::SeqCst); + assert!(!old); + }) + }; + let callback_resume_vcpus = { + let cpus_running = cpus_throttled.clone(); + Box::new(move || { + let old = cpus_running.swap(false, Ordering::SeqCst); + assert!(old); + }) + }; + + // State transitions: Waiting -> Throttle -> Waiting -> Throttle -> Exit + { + let mut handler = + ThrottleThreadHandle::new(callback_pause_vcpus, callback_resume_vcpus); + handler.set_throttle_percent(5); + sleep(TimesliceContext::INITIAL_TIMESLICE); + handler.set_throttle_percent(10); + sleep(TimesliceContext::INITIAL_TIMESLICE); + + // Assume we aborted vCPU throttling (or the live-migration at all). + handler.set_throttle_percent(0 /* reset to waiting */); + handler.set_throttle_percent(5); + sleep(TimesliceContext::INITIAL_TIMESLICE); + handler.set_throttle_percent(10); + sleep(TimesliceContext::INITIAL_TIMESLICE); + + // The test is successful if we don't have a panic here due to a + // closed channel. + for _ in 0..10 { + handler.shutdown(); + sleep(Duration::from_millis(1)); + } + + // The test is successful if it does not get stuck. + drop(handler); + } + } + } +} diff --git a/vmm/src/vm.rs b/vmm/src/vm.rs index 836feb4707..58fbae6f5a 100644 --- a/vmm/src/vm.rs +++ b/vmm/src/vm.rs @@ -95,6 +95,7 @@ use crate::migration::get_vm_snapshot; #[cfg(all(target_arch = "x86_64", feature = "guest_debug"))] use crate::migration::url_to_file; use crate::migration::{SNAPSHOT_CONFIG_FILE, SNAPSHOT_STATE_FILE, url_to_path}; +use crate::vcpu_throttling::ThrottleThreadHandle; #[cfg(feature = "fw_cfg")] use crate::vm_config::FwCfgConfig; use crate::vm_config::{ @@ -521,6 +522,7 @@ pub struct Vm { hypervisor: Arc, stop_on_boot: bool, load_payload_handle: Option>>, + vcpu_throttler: ThrottleThreadHandle, } impl Vm { @@ -807,6 +809,10 @@ impl Vm { VmState::Created }; + // TODO we could also spawn the thread when a migration with auto-converge starts. + // Probably this is the better design. + let vcpu_throttler = ThrottleThreadHandle::new_from_cpu_manager(&cpu_manager); + Ok(Vm { #[cfg(feature = "tdx")] kernel, @@ -826,6 +832,7 @@ impl Vm { hypervisor, stop_on_boot, load_payload_handle, + vcpu_throttler, }) } @@ -979,6 +986,31 @@ impl Vm { Ok(numa_nodes) } + /// Set's the throttle percentage to a value in range `0..=99`. + /// + /// Setting the value back to `0` brings the thread back into a waiting + /// state. + /// + /// # Panic + /// Panics, if `percent_new` is not in range `0..=99`. + pub fn set_throttle_percent(&self, percent: u8 /* 1..=99 */) { + self.vcpu_throttler.set_throttle_percent(percent); + } + + /// Get the current throttle percentage in range `0..=99`. + /// + /// Please note that the value is not synchronized. + pub fn throttle_percent(&self) -> u8 { + self.vcpu_throttler.throttle_percent() + } + + /// Stops and terminates the thread gracefully. + /// + /// Waits for the thread to finish. + pub fn stop_vcpu_throttling(&mut self) { + self.vcpu_throttler.shutdown(); + } + #[allow(clippy::too_many_arguments)] pub fn new( vm_config: Arc>, From 510546bea2403631c74b5cd34035b6e252f55775 Mon Sep 17 00:00:00 2001 From: Stefan Kober Date: Thu, 11 Sep 2025 13:25:21 +0200 Subject: [PATCH 198/294] console: add tcp option In addition to configuration options like pty, file, tty, ... we allow setting the serial device to be accessed via some open TCP port on the host. Signed-off-by: Stefan Kober --- src/main.rs | 2 ++ vmm/src/config.rs | 23 +++++++++++++++++++++++ vmm/src/console_devices.rs | 3 +++ vmm/src/lib.rs | 2 ++ vmm/src/vm_config.rs | 4 ++++ 5 files changed, 34 insertions(+) diff --git a/src/main.rs b/src/main.rs index d83a90683a..290ef4d17e 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1002,12 +1002,14 @@ mod unit_tests { mode: ConsoleOutputMode::Null, iommu: false, socket: None, + url: None, }, console: ConsoleConfig { file: None, mode: ConsoleOutputMode::Tty, iommu: false, socket: None, + url: None, }, #[cfg(target_arch = "x86_64")] debug_console: DebugConsoleConfig::default(), diff --git a/vmm/src/config.rs b/vmm/src/config.rs index cab06cc153..dbb804e8ed 100644 --- a/vmm/src/config.rs +++ b/vmm/src/config.rs @@ -182,6 +182,9 @@ pub enum ValidationError { /// Missing file value for console #[error("Path missing when using file console mode")] ConsoleFileMissing, + /// Missing TCP address for console + #[error("Address missing when using TCP console mode")] + ConsoleTcpAddressMissing, /// Missing socket path for console #[error("Path missing when using socket console mode")] ConsoleSocketPathMissing, @@ -1825,11 +1828,13 @@ impl ConsoleConfig { .add_valueless("null") .add("file") .add("iommu") + .add("tcp") .add("socket"); parser.parse(console).map_err(Error::ParseConsole)?; let mut file: Option = default_consoleconfig_file(); let mut socket: Option = None; + let mut url: Option = None; let mut mode: ConsoleOutputMode = ConsoleOutputMode::Off; if parser.is_set("off") { @@ -1845,6 +1850,13 @@ impl ConsoleConfig { Some(PathBuf::from(parser.get("file").ok_or( Error::Validation(ValidationError::ConsoleFileMissing), )?)); + } else if parser.is_set("tcp") { + mode = ConsoleOutputMode::Tcp; + url = Some( + parser + .get("tcp") + .ok_or(Error::Validation(ValidationError::ConsoleTcpAddressMissing))?, + ); } else if parser.is_set("socket") { mode = ConsoleOutputMode::Socket; socket = Some(PathBuf::from(parser.get("socket").ok_or( @@ -1864,6 +1876,7 @@ impl ConsoleConfig { mode, iommu, socket, + url, }) } } @@ -3726,6 +3739,7 @@ mod tests { iommu: false, file: None, socket: None, + url: None, } ); assert_eq!( @@ -3735,6 +3749,7 @@ mod tests { iommu: false, file: None, socket: None, + url: None, } ); assert_eq!( @@ -3744,6 +3759,7 @@ mod tests { iommu: false, file: None, socket: None, + url: None, } ); assert_eq!( @@ -3753,6 +3769,7 @@ mod tests { iommu: false, file: None, socket: None, + url: None, } ); assert_eq!( @@ -3762,6 +3779,7 @@ mod tests { iommu: false, file: Some(PathBuf::from("/tmp/console")), socket: None, + url: None, } ); assert_eq!( @@ -3771,6 +3789,7 @@ mod tests { iommu: true, file: None, socket: None, + url: None, } ); assert_eq!( @@ -3780,6 +3799,7 @@ mod tests { iommu: true, file: Some(PathBuf::from("/tmp/console")), socket: None, + url: None, } ); assert_eq!( @@ -3789,6 +3809,7 @@ mod tests { iommu: true, file: None, socket: Some(PathBuf::from("/tmp/serial.sock")), + url: None, } ); Ok(()) @@ -4155,12 +4176,14 @@ mod tests { mode: ConsoleOutputMode::Null, iommu: false, socket: None, + url: None, }, console: ConsoleConfig { file: None, mode: ConsoleOutputMode::Tty, iommu: false, socket: None, + url: None, }, #[cfg(target_arch = "x86_64")] debug_console: DebugConsoleConfig::default(), diff --git a/vmm/src/console_devices.rs b/vmm/src/console_devices.rs index 9f8d18ae7c..dcac97fb08 100644 --- a/vmm/src/console_devices.rs +++ b/vmm/src/console_devices.rs @@ -227,6 +227,7 @@ pub(crate) fn pre_create_console_devices(vmm: &mut Vmm) -> ConsoleDeviceResult { return Err(ConsoleDeviceError::NoSocketOptionSupportForConsoleDevice); } + ConsoleOutputMode::Tcp => ConsoleOutput::Null, ConsoleOutputMode::Null => ConsoleOutput::Null, ConsoleOutputMode::Off => ConsoleOutput::Off, }, @@ -264,6 +265,7 @@ pub(crate) fn pre_create_console_devices(vmm: &mut Vmm) -> ConsoleDeviceResult ConsoleOutput::Null, ConsoleOutputMode::Null => ConsoleOutput::Null, ConsoleOutputMode::Off => ConsoleOutput::Off, }, @@ -290,6 +292,7 @@ pub(crate) fn pre_create_console_devices(vmm: &mut Vmm) -> ConsoleDeviceResult { return Err(ConsoleDeviceError::NoSocketOptionSupportForConsoleDevice); } + ConsoleOutputMode::Tcp => ConsoleOutput::Null, ConsoleOutputMode::Null => ConsoleOutput::Null, ConsoleOutputMode::Off => ConsoleOutput::Off, }, diff --git a/vmm/src/lib.rs b/vmm/src/lib.rs index 19f5a5cc06..2b81a17d0c 100644 --- a/vmm/src/lib.rs +++ b/vmm/src/lib.rs @@ -2656,12 +2656,14 @@ mod unit_tests { mode: ConsoleOutputMode::Null, iommu: false, socket: None, + url: None, }, console: ConsoleConfig { file: None, mode: ConsoleOutputMode::Tty, iommu: false, socket: None, + url: None, }, #[cfg(target_arch = "x86_64")] debug_console: DebugConsoleConfig::default(), diff --git a/vmm/src/vm_config.rs b/vmm/src/vm_config.rs index 33147092eb..d7062e29ea 100644 --- a/vmm/src/vm_config.rs +++ b/vmm/src/vm_config.rs @@ -496,6 +496,7 @@ pub enum ConsoleOutputMode { Tty, File, Socket, + Tcp, Null, } @@ -507,6 +508,7 @@ pub struct ConsoleConfig { #[serde(default)] pub iommu: bool, pub socket: Option, + pub url: Option, } pub fn default_consoleconfig_file() -> Option { @@ -856,6 +858,7 @@ pub fn default_serial() -> ConsoleConfig { mode: ConsoleOutputMode::Null, iommu: false, socket: None, + url: None, } } @@ -865,6 +868,7 @@ pub fn default_console() -> ConsoleConfig { mode: ConsoleOutputMode::Tty, iommu: false, socket: None, + url: None, } } From d55233c9e84340a1f64868f312a1581ba0ec0ada Mon Sep 17 00:00:00 2001 From: Stefan Kober Date: Thu, 11 Sep 2025 14:40:22 +0200 Subject: [PATCH 199/294] serial: add Tcp enum entry Signed-off-by: Stefan Kober --- vmm/src/serial_manager.rs | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/vmm/src/serial_manager.rs b/vmm/src/serial_manager.rs index 9c4255defa..592feeb421 100644 --- a/vmm/src/serial_manager.rs +++ b/vmm/src/serial_manager.rs @@ -91,9 +91,10 @@ pub enum EpollDispatch { File = 0, Kill = 1, Socket = 2, + Tcp = 3, Unknown, } -const EPOLL_EVENTS_LEN: usize = 4; +const EPOLL_EVENTS_LEN: usize = 5; impl From for EpollDispatch { fn from(v: u64) -> Self { @@ -102,6 +103,7 @@ impl From for EpollDispatch { 0 => File, 1 => Kill, 2 => Socket, + 3 => Tcp, _ => Unknown, } } @@ -345,6 +347,7 @@ impl SerialManager { .map_err(Error::Epoll)?; serial.lock().unwrap().set_out(Some(Box::new(writer))); } + EpollDispatch::Tcp => {} EpollDispatch::File => { if event.events & libc::EPOLLIN as u32 != 0 { let mut input = [0u8; 64]; From d27e491f74034fc029c9fb0619d3820002bbc49c Mon Sep 17 00:00:00 2001 From: Stefan Kober Date: Thu, 11 Sep 2025 16:07:22 +0200 Subject: [PATCH 200/294] vcpu: add sendto syscall to seccomp list Adding the TCP socket support to the serial device handling requires to add a new syscall to the seccomp filter list. Signed-off-by: Stefan Kober --- vmm/src/seccomp_filters.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/vmm/src/seccomp_filters.rs b/vmm/src/seccomp_filters.rs index 3dacd30463..b3175ffe15 100644 --- a/vmm/src/seccomp_filters.rs +++ b/vmm/src/seccomp_filters.rs @@ -809,6 +809,7 @@ fn vcpu_thread_rules( (libc::SYS_rt_sigreturn, vec![]), (libc::SYS_sched_yield, vec![]), (libc::SYS_sendmsg, vec![]), + (libc::SYS_sendto, vec![]), (libc::SYS_shutdown, vec![]), (libc::SYS_sigaltstack, vec![]), (libc::SYS_tgkill, vec![]), From 271d19db8f2e028d6fac2e845d7cf6579c5c88fe Mon Sep 17 00:00:00 2001 From: Stefan Kober Date: Thu, 11 Sep 2025 16:01:26 +0200 Subject: [PATCH 201/294] serial: rename CloneUnixStream -> CloneStream Renaming the error makes it also usable for the new TCP socket support in the serial device. Signed-off-by: Stefan Kober --- vmm/src/serial_manager.rs | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/vmm/src/serial_manager.rs b/vmm/src/serial_manager.rs index 592feeb421..532aa70519 100644 --- a/vmm/src/serial_manager.rs +++ b/vmm/src/serial_manager.rs @@ -67,9 +67,9 @@ pub enum Error { #[error("Error accepting connection")] AcceptConnection(#[source] io::Error), - /// Cannot clone the UnixStream - #[error("Error cloning UnixStream")] - CloneUnixStream(#[source] io::Error), + /// Cannot clone the Stream + #[error("Error cloning Stream")] + CloneStream(#[source] io::Error), /// Cannot shutdown the connection #[error("Error shutting down a connection")] @@ -330,10 +330,9 @@ impl SerialManager { let (unix_stream, _) = listener.accept().map_err(Error::AcceptConnection)?; let writer = - unix_stream.try_clone().map_err(Error::CloneUnixStream)?; - reader = Some( - unix_stream.try_clone().map_err(Error::CloneUnixStream)?, - ); + unix_stream.try_clone().map_err(Error::CloneStream)?; + reader = + Some(unix_stream.try_clone().map_err(Error::CloneStream)?); epoll::ctl( epoll_fd, From 3e1d8600b9d9509ffe21922c79e1c56b0a32565b Mon Sep 17 00:00:00 2001 From: Stefan Kober Date: Thu, 11 Sep 2025 15:01:17 +0200 Subject: [PATCH 202/294] serial: add serial tcp implementation Similar to the unix socket implementation, we allow a user to set a port were the serial is reachable. Signed-off-by: Stefan Kober --- vmm/src/console_devices.rs | 16 ++++++++- vmm/src/device_manager.rs | 10 +++++- vmm/src/serial_manager.rs | 74 ++++++++++++++++++++++++++++++++++---- 3 files changed, 92 insertions(+), 8 deletions(-) diff --git a/vmm/src/console_devices.rs b/vmm/src/console_devices.rs index dcac97fb08..d1a13f79e0 100644 --- a/vmm/src/console_devices.rs +++ b/vmm/src/console_devices.rs @@ -12,6 +12,7 @@ use std::fs::{File, OpenOptions, read_link}; use std::mem::zeroed; +use std::net::TcpListener; use std::os::fd::{AsRawFd, FromRawFd, RawFd}; use std::os::unix::fs::OpenOptionsExt; use std::os::unix::net::UnixListener; @@ -40,6 +41,10 @@ pub enum ConsoleDeviceError { #[error("No socket option support for console device")] NoSocketOptionSupportForConsoleDevice, + /// Error parsing the TCP address + #[error("Wrong TCP address format: {0}")] + WrongTcpAddressFormat(std::string::String), + /// Error setting pty raw mode #[error("Error setting pty raw mode")] SetPtyRaw(#[source] vmm_sys_util::errno::Error), @@ -62,6 +67,7 @@ pub enum ConsoleOutput { Tty(Arc), Null, Socket(Arc), + Tcp(Arc), Off, } @@ -265,7 +271,15 @@ pub(crate) fn pre_create_console_devices(vmm: &mut Vmm) -> ConsoleDeviceResult ConsoleOutput::Null, + ConsoleOutputMode::Tcp => { + let url = vmconfig.serial.url.as_ref().unwrap(); + let socket_addr: std::net::SocketAddr = url + .parse() + .map_err(|_| ConsoleDeviceError::WrongTcpAddressFormat(url.to_string()))?; + let listener = TcpListener::bind(socket_addr) + .map_err(ConsoleDeviceError::CreateConsoleDevice)?; + ConsoleOutput::Tcp(Arc::new(listener)) + } ConsoleOutputMode::Null => ConsoleOutput::Null, ConsoleOutputMode::Off => ConsoleOutput::Off, }, diff --git a/vmm/src/device_manager.rs b/vmm/src/device_manager.rs index 577c82eab1..ffca459d0f 100644 --- a/vmm/src/device_manager.rs +++ b/vmm/src/device_manager.rs @@ -2350,6 +2350,9 @@ impl DeviceManager { ConsoleOutput::Socket(_) => { return Err(DeviceManagerError::NoSocketOptionSupportForConsoleDevice); } + ConsoleOutput::Tcp(_) => { + return Err(DeviceManagerError::NoSocketOptionSupportForConsoleDevice); + } ConsoleOutput::Null => Endpoint::Null, ConsoleOutput::Off => return Ok(None), }; @@ -2424,12 +2427,16 @@ impl DeviceManager { | ConsoleOutput::Null | ConsoleOutput::Pty(_) | ConsoleOutput::Socket(_) => None, + ConsoleOutput::Tcp(_) => None, }; if !matches!(console_info.serial_main_fd, ConsoleOutput::Off) { let serial = self.add_serial_device(interrupt_manager, serial_writer)?; self.serial_manager = match console_info.serial_main_fd { - ConsoleOutput::Pty(_) | ConsoleOutput::Tty(_) | ConsoleOutput::Socket(_) => { + ConsoleOutput::Pty(_) + | ConsoleOutput::Tty(_) + | ConsoleOutput::Socket(_) + | ConsoleOutput::Tcp(_) => { let serial_manager = SerialManager::new( serial, console_info.serial_main_fd, @@ -2462,6 +2469,7 @@ impl DeviceManager { | ConsoleOutput::Null | ConsoleOutput::Pty(_) | ConsoleOutput::Socket(_) => None, + ConsoleOutput::Tcp(_) => None, }; if let Some(writer) = debug_console_writer { let _ = self.add_debug_console_device(writer)?; diff --git a/vmm/src/serial_manager.rs b/vmm/src/serial_manager.rs index 532aa70519..c1314b350e 100644 --- a/vmm/src/serial_manager.rs +++ b/vmm/src/serial_manager.rs @@ -5,7 +5,7 @@ use std::fs::File; use std::io::Read; -use std::net::Shutdown; +use std::net::{Shutdown, TcpStream}; use std::os::unix::io::{AsRawFd, FromRawFd, IntoRawFd}; use std::os::unix::net::UnixStream; use std::panic::AssertUnwindSafe; @@ -167,6 +167,7 @@ impl SerialManager { } fd.as_raw_fd() } + ConsoleOutput::Tcp(ref fd) => fd.as_raw_fd(), _ => return Ok(None), }; @@ -181,10 +182,14 @@ impl SerialManager { ) .map_err(Error::Epoll)?; - let epoll_fd_data = if let ConsoleOutput::Socket(_) = output { - EpollDispatch::Socket - } else { - EpollDispatch::File + let epoll_fd_data = match output { + ConsoleOutput::File(_) => EpollDispatch::File, + ConsoleOutput::Pty(_) => EpollDispatch::File, + ConsoleOutput::Tty(_) => EpollDispatch::File, + ConsoleOutput::Null => EpollDispatch::File, + ConsoleOutput::Off => EpollDispatch::File, + ConsoleOutput::Socket(_) => EpollDispatch::Socket, + ConsoleOutput::Tcp(_) => EpollDispatch::Tcp, }; epoll::ctl( @@ -261,6 +266,7 @@ impl SerialManager { let serial = self.serial.clone(); let pty_write_out = self.pty_write_out.clone(); let mut reader: Option = None; + let mut reader_tcp: Option = None; // In case of PTY, we want to be able to detect a connection on the // other end of the PTY. This is done by detecting there's no event @@ -346,7 +352,40 @@ impl SerialManager { .map_err(Error::Epoll)?; serial.lock().unwrap().set_out(Some(Box::new(writer))); } - EpollDispatch::Tcp => {} + EpollDispatch::Tcp => { + // New connection request arrived. + // Shutdown the previous connection, if any + if let Some(ref previous_reader) = reader_tcp { + previous_reader + .shutdown(Shutdown::Both) + .map_err(Error::AcceptConnection)?; + } + + let ConsoleOutput::Tcp(ref listener) = in_file else { + unreachable!(); + }; + + // Events on the listening socket will be connection requests. + // Accept them, create a reader and a writer. + let (tcp_stream, _) = + listener.accept().map_err(Error::AcceptConnection)?; + let writer = + tcp_stream.try_clone().map_err(Error::CloneStream)?; + reader_tcp = + Some(tcp_stream.try_clone().map_err(Error::CloneStream)?); + + epoll::ctl( + epoll_fd, + epoll::ControlOptions::EPOLL_CTL_ADD, + tcp_stream.into_raw_fd(), + epoll::Event::new( + epoll::Events::EPOLLIN, + EpollDispatch::File as u64, + ), + ) + .map_err(Error::Epoll)?; + serial.lock().unwrap().set_out(Some(Box::new(writer))); + } EpollDispatch::File => { if event.events & libc::EPOLLIN as u32 != 0 { let mut input = [0u8; 64]; @@ -373,6 +412,29 @@ impl SerialManager { 0 } } + ConsoleOutput::Tcp(_) => { + if let Some(mut serial_reader) = reader_tcp.as_ref() + { + let count = serial_reader + .read(&mut input) + .map_err(Error::ReadInput)?; + if count == 0 { + info!("Remote end closed serial socket"); + serial_reader + .shutdown(Shutdown::Both) + .map_err(Error::ShutdownConnection)?; + reader_tcp = None; + serial + .as_ref() + .lock() + .unwrap() + .set_out(None); + } + count + } else { + 0 + } + } ConsoleOutput::Pty(file) | ConsoleOutput::Tty(file) => { (&**file) .read(&mut input) From b70c9a662fa4a8c4d84767d219bc66f00e0bb347 Mon Sep 17 00:00:00 2001 From: Stefan Kober Date: Tue, 16 Sep 2025 13:33:52 +0200 Subject: [PATCH 203/294] serial: add tcp serial option to help Signed-off-by: Stefan Kober --- src/main.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main.rs b/src/main.rs index 290ef4d17e..478ad4aa9c 100644 --- a/src/main.rs +++ b/src/main.rs @@ -420,7 +420,7 @@ fn get_cli_options_sorted( .default_value("true"), Arg::new("serial") .long("serial") - .help("Control serial port: off|null|pty|tty|file=|socket=") + .help("Control serial port: off|null|pty|tty|file=|socket=|tcp=") .default_value("null") .group("vm-config"), Arg::new("tpm") From 8735fc5f6457c7b9387b4dec9054eb839e242591 Mon Sep 17 00:00:00 2001 From: Stefan Kober Date: Tue, 16 Sep 2025 14:13:46 +0200 Subject: [PATCH 204/294] serial: implement writer combiner When using the TCP serial mode, we also want to write the serial output to a file. We use a generic write combiner that would allow us adding even more writers. Signed-off-by: Stefan Kober --- vmm/src/serial_manager.rs | 71 +++++++++++++++++++++++++++++++++++---- 1 file changed, 64 insertions(+), 7 deletions(-) diff --git a/vmm/src/serial_manager.rs b/vmm/src/serial_manager.rs index c1314b350e..5d5b6e601f 100644 --- a/vmm/src/serial_manager.rs +++ b/vmm/src/serial_manager.rs @@ -3,8 +3,9 @@ // SPDX-License-Identifier: Apache-2.0 // +use std::collections::HashMap; use std::fs::File; -use std::io::Read; +use std::io::{Read, Write}; use std::net::{Shutdown, TcpStream}; use std::os::unix::io::{AsRawFd, FromRawFd, IntoRawFd}; use std::os::unix::net::UnixStream; @@ -109,6 +110,58 @@ impl From for EpollDispatch { } } +/// A thread-safe writer that fans out to multiple keyed writers. Allows for +/// bundling different kinds of writers for the serial device, e.g. writing to +/// a TCP socket and a file. +#[derive(Clone)] +pub struct FanoutWriter { + writers: Arc>>>, +} + +impl FanoutWriter { + pub fn new() -> Self { + FanoutWriter { + writers: Arc::new(Mutex::new(HashMap::new())), + } + } + + pub fn add_writer(&self, key: String, writer: W) { + let mut writers = self.writers.lock().unwrap(); + writers.insert(key, Box::new(writer)); + } + + pub fn remove_writer(&self, key: &str) -> Option> { + let mut writers = self.writers.lock().unwrap(); + writers.remove(key) + } +} + +impl Write for FanoutWriter { + fn write(&mut self, buf: &[u8]) -> io::Result { + let mut writers = self.writers.lock().unwrap(); + let mut result: io::Result = Ok(buf.len()); + + for (i, w) in writers.values_mut().enumerate() { + let r = w.write(buf); + if i == 0 { + result = r; + } else if let Err(e) = r { + return Err(e); + } + } + + result + } + + fn flush(&mut self) -> io::Result<()> { + let mut writers = self.writers.lock().unwrap(); + for w in writers.values_mut() { + w.flush()?; + } + Ok(()) + } +} + pub struct SerialManager { #[cfg(any(target_arch = "x86_64", target_arch = "riscv64"))] serial: Arc>, @@ -280,8 +333,15 @@ impl SerialManager { .name("serial-manager".to_string()) .spawn(move || { std::panic::catch_unwind(AssertUnwindSafe(move || { + let write_distributor = FanoutWriter::new(); + let mut events = [epoll::Event::new(epoll::Events::empty(), 0); EPOLL_EVENTS_LEN]; + serial + .as_ref() + .lock() + .unwrap() + .set_out(Some(Box::new(write_distributor.clone()))); loop { let num_events = match epoll::wait(epoll_fd, timeout, &mut events[..]) { @@ -359,6 +419,7 @@ impl SerialManager { previous_reader .shutdown(Shutdown::Both) .map_err(Error::AcceptConnection)?; + write_distributor.remove_writer("tcp"); } let ConsoleOutput::Tcp(ref listener) = in_file else { @@ -384,7 +445,7 @@ impl SerialManager { ), ) .map_err(Error::Epoll)?; - serial.lock().unwrap().set_out(Some(Box::new(writer))); + write_distributor.add_writer("tcp".into(), writer); } EpollDispatch::File => { if event.events & libc::EPOLLIN as u32 != 0 { @@ -424,11 +485,7 @@ impl SerialManager { .shutdown(Shutdown::Both) .map_err(Error::ShutdownConnection)?; reader_tcp = None; - serial - .as_ref() - .lock() - .unwrap() - .set_out(None); + write_distributor.remove_writer("tcp"); } count } else { From 8a3e2c75d559ec43a6bdb9bdbe9e0dfd21afa227 Mon Sep 17 00:00:00 2001 From: Stefan Kober Date: Wed, 17 Sep 2025 08:42:29 +0200 Subject: [PATCH 205/294] config: allow additional file when mode TCP When using the newly added TCP serial mode, we allow specifying an additional file to log into. This allows users to access the complete bootlog of a VM, as the TCP socket does not buffer anything. Signed-off-by: Stefan Kober --- vmm/src/config.rs | 18 ++++++++++++------ vmm/src/console_devices.rs | 10 ++++++++-- vmm/src/device_manager.rs | 8 ++++---- vmm/src/serial_manager.rs | 8 ++++---- 4 files changed, 28 insertions(+), 16 deletions(-) diff --git a/vmm/src/config.rs b/vmm/src/config.rs index dbb804e8ed..00d26ec881 100644 --- a/vmm/src/config.rs +++ b/vmm/src/config.rs @@ -1844,12 +1844,6 @@ impl ConsoleConfig { mode = ConsoleOutputMode::Tty } else if parser.is_set("null") { mode = ConsoleOutputMode::Null - } else if parser.is_set("file") { - mode = ConsoleOutputMode::File; - file = - Some(PathBuf::from(parser.get("file").ok_or( - Error::Validation(ValidationError::ConsoleFileMissing), - )?)); } else if parser.is_set("tcp") { mode = ConsoleOutputMode::Tcp; url = Some( @@ -1857,6 +1851,18 @@ impl ConsoleConfig { .get("tcp") .ok_or(Error::Validation(ValidationError::ConsoleTcpAddressMissing))?, ); + if parser.is_set("file") { + file = + Some(PathBuf::from(parser.get("file").ok_or( + Error::Validation(ValidationError::ConsoleFileMissing), + )?)); + } + } else if parser.is_set("file") { + mode = ConsoleOutputMode::File; + file = + Some(PathBuf::from(parser.get("file").ok_or( + Error::Validation(ValidationError::ConsoleFileMissing), + )?)); } else if parser.is_set("socket") { mode = ConsoleOutputMode::Socket; socket = Some(PathBuf::from(parser.get("socket").ok_or( diff --git a/vmm/src/console_devices.rs b/vmm/src/console_devices.rs index d1a13f79e0..19ac18e3ef 100644 --- a/vmm/src/console_devices.rs +++ b/vmm/src/console_devices.rs @@ -67,7 +67,7 @@ pub enum ConsoleOutput { Tty(Arc), Null, Socket(Arc), - Tcp(Arc), + Tcp(Arc, Option>), Off, } @@ -278,7 +278,13 @@ pub(crate) fn pre_create_console_devices(vmm: &mut Vmm) -> ConsoleDeviceResult ConsoleOutput::Null, ConsoleOutputMode::Off => ConsoleOutput::Off, diff --git a/vmm/src/device_manager.rs b/vmm/src/device_manager.rs index ffca459d0f..3f09c6f828 100644 --- a/vmm/src/device_manager.rs +++ b/vmm/src/device_manager.rs @@ -2350,7 +2350,7 @@ impl DeviceManager { ConsoleOutput::Socket(_) => { return Err(DeviceManagerError::NoSocketOptionSupportForConsoleDevice); } - ConsoleOutput::Tcp(_) => { + ConsoleOutput::Tcp(_, _) => { return Err(DeviceManagerError::NoSocketOptionSupportForConsoleDevice); } ConsoleOutput::Null => Endpoint::Null, @@ -2427,7 +2427,7 @@ impl DeviceManager { | ConsoleOutput::Null | ConsoleOutput::Pty(_) | ConsoleOutput::Socket(_) => None, - ConsoleOutput::Tcp(_) => None, + ConsoleOutput::Tcp(_, _) => None, }; if !matches!(console_info.serial_main_fd, ConsoleOutput::Off) { @@ -2436,7 +2436,7 @@ impl DeviceManager { ConsoleOutput::Pty(_) | ConsoleOutput::Tty(_) | ConsoleOutput::Socket(_) - | ConsoleOutput::Tcp(_) => { + | ConsoleOutput::Tcp(_, _) => { let serial_manager = SerialManager::new( serial, console_info.serial_main_fd, @@ -2469,7 +2469,7 @@ impl DeviceManager { | ConsoleOutput::Null | ConsoleOutput::Pty(_) | ConsoleOutput::Socket(_) => None, - ConsoleOutput::Tcp(_) => None, + ConsoleOutput::Tcp(_, _) => None, }; if let Some(writer) = debug_console_writer { let _ = self.add_debug_console_device(writer)?; diff --git a/vmm/src/serial_manager.rs b/vmm/src/serial_manager.rs index 5d5b6e601f..0f2f97db5a 100644 --- a/vmm/src/serial_manager.rs +++ b/vmm/src/serial_manager.rs @@ -220,7 +220,7 @@ impl SerialManager { } fd.as_raw_fd() } - ConsoleOutput::Tcp(ref fd) => fd.as_raw_fd(), + ConsoleOutput::Tcp(ref fd, _) => fd.as_raw_fd(), _ => return Ok(None), }; @@ -242,7 +242,7 @@ impl SerialManager { ConsoleOutput::Null => EpollDispatch::File, ConsoleOutput::Off => EpollDispatch::File, ConsoleOutput::Socket(_) => EpollDispatch::Socket, - ConsoleOutput::Tcp(_) => EpollDispatch::Tcp, + ConsoleOutput::Tcp(_, _) => EpollDispatch::Tcp, }; epoll::ctl( @@ -422,7 +422,7 @@ impl SerialManager { write_distributor.remove_writer("tcp"); } - let ConsoleOutput::Tcp(ref listener) = in_file else { + let ConsoleOutput::Tcp(ref listener, _) = in_file else { unreachable!(); }; @@ -473,7 +473,7 @@ impl SerialManager { 0 } } - ConsoleOutput::Tcp(_) => { + ConsoleOutput::Tcp(_, _) => { if let Some(mut serial_reader) = reader_tcp.as_ref() { let count = serial_reader From e48a1ca023494f36757adbb3e611aca3fdc83c03 Mon Sep 17 00:00:00 2001 From: Stefan Kober Date: Wed, 17 Sep 2025 09:37:46 +0200 Subject: [PATCH 206/294] serial: additional log to file Use the user provided file path that can be additionally specified when TCP mode is selected for serial. Signed-off-by: Stefan Kober --- vmm/src/serial_manager.rs | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/vmm/src/serial_manager.rs b/vmm/src/serial_manager.rs index 0f2f97db5a..8fd13be9f1 100644 --- a/vmm/src/serial_manager.rs +++ b/vmm/src/serial_manager.rs @@ -335,6 +335,10 @@ impl SerialManager { std::panic::catch_unwind(AssertUnwindSafe(move || { let write_distributor = FanoutWriter::new(); + if let ConsoleOutput::Tcp(_, Some(f)) = &in_file { + write_distributor.add_writer("file".into(), f.clone()); + } + let mut events = [epoll::Event::new(epoll::Events::empty(), 0); EPOLL_EVENTS_LEN]; serial From ecb992e608727df95619b8f135f9b14de36c41be Mon Sep 17 00:00:00 2001 From: Stefan Kober Date: Mon, 29 Sep 2025 11:00:25 +0200 Subject: [PATCH 207/294] ch: use TypeId trait for key type in FanoutWriter Signed-off-by: Stefan Kober --- vmm/src/serial_manager.rs | 21 ++++++++++++--------- 1 file changed, 12 insertions(+), 9 deletions(-) diff --git a/vmm/src/serial_manager.rs b/vmm/src/serial_manager.rs index 8fd13be9f1..01c224ec3e 100644 --- a/vmm/src/serial_manager.rs +++ b/vmm/src/serial_manager.rs @@ -3,6 +3,7 @@ // SPDX-License-Identifier: Apache-2.0 // +use std::any::TypeId; use std::collections::HashMap; use std::fs::File; use std::io::{Read, Write}; @@ -115,7 +116,7 @@ impl From for EpollDispatch { /// a TCP socket and a file. #[derive(Clone)] pub struct FanoutWriter { - writers: Arc>>>, + writers: Arc>>>, } impl FanoutWriter { @@ -125,14 +126,14 @@ impl FanoutWriter { } } - pub fn add_writer(&self, key: String, writer: W) { + pub fn add_writer(&self, writer: W) { let mut writers = self.writers.lock().unwrap(); - writers.insert(key, Box::new(writer)); + writers.insert(TypeId::of::(), Box::new(writer)); } - pub fn remove_writer(&self, key: &str) -> Option> { + pub fn remove_writer(&self, id: TypeId) -> Option> { let mut writers = self.writers.lock().unwrap(); - writers.remove(key) + writers.remove(&id) } } @@ -336,7 +337,7 @@ impl SerialManager { let write_distributor = FanoutWriter::new(); if let ConsoleOutput::Tcp(_, Some(f)) = &in_file { - write_distributor.add_writer("file".into(), f.clone()); + write_distributor.add_writer(f.clone()); } let mut events = @@ -423,7 +424,7 @@ impl SerialManager { previous_reader .shutdown(Shutdown::Both) .map_err(Error::AcceptConnection)?; - write_distributor.remove_writer("tcp"); + write_distributor.remove_writer(TypeId::of::()); } let ConsoleOutput::Tcp(ref listener, _) = in_file else { @@ -449,7 +450,7 @@ impl SerialManager { ), ) .map_err(Error::Epoll)?; - write_distributor.add_writer("tcp".into(), writer); + write_distributor.add_writer(writer); } EpollDispatch::File => { if event.events & libc::EPOLLIN as u32 != 0 { @@ -489,7 +490,9 @@ impl SerialManager { .shutdown(Shutdown::Both) .map_err(Error::ShutdownConnection)?; reader_tcp = None; - write_distributor.remove_writer("tcp"); + write_distributor.remove_writer( + TypeId::of::(), + ); } count } else { From fd140ac83a7214feb9f00097040922c88472e993 Mon Sep 17 00:00:00 2001 From: Stefan Kober Date: Tue, 30 Sep 2025 09:00:18 +0200 Subject: [PATCH 208/294] vmm: clippy: fix manual_is_multiple_of Fix the clippy error in auto converge interation increasing. --- vmm/src/lib.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vmm/src/lib.rs b/vmm/src/lib.rs index 2b81a17d0c..1201832fd9 100644 --- a/vmm/src/lib.rs +++ b/vmm/src/lib.rs @@ -1160,7 +1160,7 @@ impl Vmm { false } else { let iteration = s.iteration - AUTO_CONVERGE_ITERATION_DELAY; - iteration % AUTO_CONVERGE_ITERATION_INCREASE == 0 + iteration.is_multiple_of(AUTO_CONVERGE_ITERATION_INCREASE) } } From 1498fee49a565eafd7f0e85427f117ccadd68f45 Mon Sep 17 00:00:00 2001 From: Philipp Schuster Date: Tue, 24 Jun 2025 10:31:37 +0200 Subject: [PATCH 209/294] hypervisor: kvm/x86_64: get and set nested guest state Since the functionality of [0] was backported to kvm-bindings@v0.12.1 and kvm-ioctls@v0.22.1 [1, 2], we can now save nested KVM state. This way, nesting works across state save/resume and live-migration. [0] https://github.com/rust-vmm/kvm/pull/322 [1] https://github.com/rust-vmm/kvm/pull/349 [2] https://github.com/rust-vmm/kvm/pull/350 Signed-off-by: Philipp Schuster On-behalf-of: SAP philipp.schuster@sap.com --- Cargo.lock | 8 +++---- Cargo.toml | 4 ++-- hypervisor/src/cpu.rs | 4 ++++ hypervisor/src/kvm/mod.rs | 37 ++++++++++++++++++++++++++++++++ hypervisor/src/kvm/x86_64/mod.rs | 4 ++++ vmm/src/seccomp_filters.rs | 6 ++++++ 6 files changed, 57 insertions(+), 6 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index ed015d1ce7..a56f90cad2 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1053,9 +1053,9 @@ dependencies = [ [[package]] name = "kvm-bindings" -version = "0.12.0" +version = "0.12.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d4b153a59bb3ca930ff8148655b2ef68c34259a623ae08cf2fb9b570b2e45363" +checksum = "9a537873e15e8daabb416667e606d9b0abc2a8fb9a45bd5853b888ae0ead82f9" dependencies = [ "serde", "vmm-sys-util", @@ -1064,9 +1064,9 @@ dependencies = [ [[package]] name = "kvm-ioctls" -version = "0.22.0" +version = "0.22.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b702df98508cb63ad89dd9beb9f6409761b30edca10d48e57941d3f11513a006" +checksum = "0c8f7370330b4f57981e300fa39b02088f2f2a5c2d0f1f994e8090589619c56d" dependencies = [ "bitflags 2.9.4", "kvm-bindings", diff --git a/Cargo.toml b/Cargo.toml index ed4c5225f8..1ce6d53ba3 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -121,8 +121,8 @@ package.edition = "2024" [workspace.dependencies] # rust-vmm crates acpi_tables = { git = "https://github.com/rust-vmm/acpi_tables", branch = "main" } -kvm-bindings = "0.12.0" -kvm-ioctls = "0.22.0" +kvm-bindings = "0.12.1" +kvm-ioctls = "0.22.1" # TODO: update to 0.13.1+ linux-loader = { git = "https://github.com/rust-vmm/linux-loader", branch = "main" } mshv-bindings = "0.6.0" diff --git a/hypervisor/src/cpu.rs b/hypervisor/src/cpu.rs index e138cb745b..519b99f567 100644 --- a/hypervisor/src/cpu.rs +++ b/hypervisor/src/cpu.rs @@ -336,6 +336,10 @@ pub enum HypervisorCpuError { /// #[error("Failed to inject NMI")] Nmi(#[source] anyhow::Error), + #[error("Failed to get nested guest state")] + GetNestedState(#[source] anyhow::Error), + #[error("Failed to set nested guest state")] + SetNestedState(#[source] anyhow::Error), } #[derive(Debug)] diff --git a/hypervisor/src/kvm/mod.rs b/hypervisor/src/kvm/mod.rs index 961b62810a..8d1bc228f1 100644 --- a/hypervisor/src/kvm/mod.rs +++ b/hypervisor/src/kvm/mod.rs @@ -84,6 +84,8 @@ use std::mem; /// #[cfg(any(target_arch = "x86_64", target_arch = "aarch64"))] pub use kvm_bindings::kvm_vcpu_events as VcpuEvents; +#[cfg(target_arch = "x86_64")] +use kvm_bindings::nested::KvmNestedStateBuffer; pub use kvm_bindings::{ KVM_GUESTDBG_ENABLE, KVM_GUESTDBG_SINGLESTEP, KVM_IRQ_ROUTING_IRQCHIP, KVM_IRQ_ROUTING_MSI, KVM_MEM_LOG_DIRTY_PAGES, KVM_MEM_READONLY, KVM_MSI_VALID_DEVID, kvm_clock_data, @@ -2468,6 +2470,7 @@ impl cpu::Vcpu for KvmVcpu { let xcrs = self.get_xcrs()?; let lapic_state = self.get_lapic()?; let fpu = self.get_fpu()?; + let nested_state = self.nested_state()?; // Try to get all MSRs based on the list previously retrieved from KVM. // If the number of MSRs obtained from GET_MSRS is different from the @@ -2542,6 +2545,7 @@ impl cpu::Vcpu for KvmVcpu { xcrs, mp_state, tsc_khz, + nested_state, } .into()) } @@ -2708,6 +2712,9 @@ impl cpu::Vcpu for KvmVcpu { self.set_xcrs(&state.xcrs)?; self.set_lapic(&state.lapic_state)?; self.set_fpu(&state.fpu)?; + if let Some(nested_state) = state.nested_state { + self.set_nested_state(&nested_state)?; + } if let Some(freq) = state.tsc_khz { self.set_tsc_khz(freq)?; @@ -3068,6 +3075,36 @@ impl KvmVcpu { .set_vcpu_events(events) .map_err(|e| cpu::HypervisorCpuError::SetVcpuEvents(e.into())) } + + /// Get the state of the nested guest from the current vCPU, + /// if there is any. + #[cfg(target_arch = "x86_64")] + fn nested_state(&self) -> cpu::Result> { + let mut buffer = KvmNestedStateBuffer::empty(); + + let maybe_size = self + .fd + .lock() + .unwrap() + .get_nested_state(&mut buffer) + .map_err(|e| cpu::HypervisorCpuError::GetNestedState(e.into()))?; + + if let Some(_size) = maybe_size { + Ok(Some(buffer)) + } else { + Ok(None) + } + } + + /// Sets the state of the nested guest for the current vCPU. + #[cfg(target_arch = "x86_64")] + fn set_nested_state(&self, state: &KvmNestedStateBuffer) -> cpu::Result<()> { + self.fd + .lock() + .unwrap() + .set_nested_state(state) + .map_err(|e| cpu::HypervisorCpuError::GetNestedState(e.into())) + } } #[cfg(test)] diff --git a/hypervisor/src/kvm/x86_64/mod.rs b/hypervisor/src/kvm/x86_64/mod.rs index a01fb9d727..c1bda9d9be 100644 --- a/hypervisor/src/kvm/x86_64/mod.rs +++ b/hypervisor/src/kvm/x86_64/mod.rs @@ -19,6 +19,7 @@ pub use { kvm_bindings::kvm_msr_entry, kvm_bindings::kvm_regs, kvm_bindings::kvm_segment, kvm_bindings::kvm_sregs, kvm_bindings::kvm_vcpu_events as VcpuEvents, kvm_bindings::kvm_xcrs as ExtendedControlRegisters, kvm_bindings::kvm_xsave, + kvm_bindings::nested::KvmNestedStateBuffer, }; use crate::arch::x86::{ @@ -75,6 +76,9 @@ pub struct VcpuKvmState { pub xcrs: ExtendedControlRegisters, pub mp_state: MpState, pub tsc_khz: Option, + // Option to prevent useless 8K (de)serialization when no nested + // state exists. + pub nested_state: Option, } impl From for kvm_segment { diff --git a/vmm/src/seccomp_filters.rs b/vmm/src/seccomp_filters.rs index b3175ffe15..2d7d3ca11e 100644 --- a/vmm/src/seccomp_filters.rs +++ b/vmm/src/seccomp_filters.rs @@ -103,6 +103,8 @@ mod kvm { pub const KVM_GET_REG_LIST: u64 = 0xc008_aeb0; pub const KVM_MEMORY_ENCRYPT_OP: u64 = 0xc008_aeba; pub const KVM_NMI: u64 = 0xae9a; + pub const KVM_GET_NESTED_STATE: u64 = 3229658814; + pub const KVM_SET_NESTED_STATE: u64 = 1082175167; } // MSHV IOCTL code. This is unstable until the kernel code has been declared stable. @@ -232,6 +234,8 @@ fn create_vmm_ioctl_seccomp_rule_common_kvm() -> Result, Backen and![Cond::new(1, ArgLen::Dword, Eq, KVM_SET_USER_MEMORY_REGION,)?], and![Cond::new(1, ArgLen::Dword, Eq, KVM_SET_VCPU_EVENTS,)?], and![Cond::new(1, ArgLen::Dword, Eq, KVM_NMI)?], + and![Cond::new(1, ArgLen::Dword, Eq, KVM_GET_NESTED_STATE)?], + and![Cond::new(1, ArgLen::Dword, Eq, KVM_SET_NESTED_STATE)?], ]) } @@ -697,6 +701,8 @@ fn create_vcpu_ioctl_seccomp_rule_kvm() -> Result, BackendError and![Cond::new(1, ArgLen::Dword, Eq, KVM_SET_USER_MEMORY_REGION,)?], and![Cond::new(1, ArgLen::Dword, Eq, KVM_RUN,)?], and![Cond::new(1, ArgLen::Dword, Eq, KVM_NMI)?], + and![Cond::new(1, ArgLen::Dword, Eq, KVM_GET_NESTED_STATE)?], + and![Cond::new(1, ArgLen::Dword, Eq, KVM_SET_NESTED_STATE)?], ]) } From 7163bfe19aff84d52132970a0e84e51f54f919f7 Mon Sep 17 00:00:00 2001 From: Philipp Schuster Date: Tue, 21 Oct 2025 10:02:09 +0200 Subject: [PATCH 210/294] block: advisory locks: use byte-range locks to match QEMU behavior The granularity has significant implications in typical cloud deployments with network storage. The Linux kernel will sync advisory locks to network file systems, but these backends may have different policies and handle locks differently. For example, Netapp speaks a NFS API but will treat advisory OFD locks for the whole file as mandatory locks, whereas byte-range locks for the whole file will remain advisory [0]. As it is a valid use case to prevent multiple CHV instances from accessing the same disk but disk management software (e.g., Cinder in OpenStack) should be able to snapshot disks while VMs are running, we need special control over the lock granularity. Therefore, it is a valid use case to lock the whole byte range of a disk image without technically locking the whole file - to get the best of both worlds. This also brings CHVs behavior in line with QEMU [1]. Whole-file locks remain a valid use case and could be supported later. This patch only provides the necessary groundwork; making it configurable is out of scope for now. [0] https://kb.netapp.com/on-prem/ontap/da/NAS/NAS-KBs/How_is_Mandatory_Locking_supported_for_NFSv4_on_ONTAP_9 [1] /util/osdep.c::qemu_lock_fcntl() Signed-off-by: Philipp Schuster On-behalf-of: SAP philipp.schuster@sap.com --- block/src/fcntl.rs | 67 ++++++++++++++++++++++++++++++++----- virtio-devices/src/block.rs | 34 ++++++++++++++++--- 2 files changed, 87 insertions(+), 14 deletions(-) diff --git a/block/src/fcntl.rs b/block/src/fcntl.rs index 2e34de1d6a..3687288a6b 100644 --- a/block/src/fcntl.rs +++ b/block/src/fcntl.rs @@ -101,13 +101,52 @@ impl LockState { } } +/// The granularity of the advisory lock. +/// +/// The granularity has significant implications in typical cloud deployments +/// with network storage. The Linux kernel will sync advisory locks to network +/// file systems, but these backends may have different policies and handle +/// locks differently. For example, Netapp speaks a NFS API but will treat +/// advisory OFD locks for the whole file as mandatory locks, whereas byte-range +/// locks for the whole file will remain advisory [0]. +/// +/// As it is a valid use case to prevent multiple CHV instances from accessing +/// the same disk but disk management software (e.g., Cinder in OpenStack) +/// should be able to snapshot disks while VMs are running, we need special +/// control over the lock granularity. Therefore, it is a valid use case to lock +/// the whole byte range of a disk image without technically locking the whole +/// file - to get the best of both worlds. +/// +/// [0] https://kb.netapp.com/on-prem/ontap/da/NAS/NAS-KBs/How_is_Mandatory_Locking_supported_for_NFSv4_on_ONTAP_9 +#[derive(Clone, Copy, Debug)] +pub enum LockGranularity { + WholeFile, + ByteRange(u64 /* from, inclusive */, u64 /* len */), +} + +impl LockGranularity { + const fn l_start(self) -> u64 { + match self { + LockGranularity::WholeFile => 0, + LockGranularity::ByteRange(start, _) => start, + } + } + + const fn l_len(self) -> u64 { + match self { + LockGranularity::WholeFile => 0, /* EOF */ + LockGranularity::ByteRange(_, len) => len, + } + } +} + /// Returns a [`struct@libc::flock`] structure for the whole file. -const fn get_flock(lock_type: LockType) -> libc::flock { +const fn get_flock(lock_type: LockType, granularity: LockGranularity) -> libc::flock { libc::flock { l_type: lock_type.to_libc_val() as libc::c_short, l_whence: libc::SEEK_SET as libc::c_short, - l_start: 0, - l_len: 0, /* EOF */ + l_start: granularity.l_start() as libc::c_long, + l_len: granularity.l_len() as libc::c_long, l_pid: 0, /* filled by callee */ } } @@ -122,8 +161,13 @@ const fn get_flock(lock_type: LockType) -> libc::flock { /// - `file`: The file to acquire a lock for [`LockType`]. The file's state will /// be logically mutated, but not technically. /// - `lock_type`: The [`LockType`] -pub fn try_acquire_lock(file: Fd, lock_type: LockType) -> Result<(), LockError> { - let flock = get_flock(lock_type); +/// - `granularity`: The [`LockGranularity`]. +pub fn try_acquire_lock( + file: Fd, + lock_type: LockType, + granularity: LockGranularity, +) -> Result<(), LockError> { + let flock = get_flock(lock_type, granularity); let res = fcntl(file.as_raw_fd(), FcntlArg::F_OFD_SETLK(&flock)); match res { @@ -146,8 +190,9 @@ pub fn try_acquire_lock(file: Fd, lock_type: LockType) -> Result<() /// /// # Parameters /// - `file`: The file to clear all locks for [`LockType`]. -pub fn clear_lock(file: Fd) -> Result<(), LockError> { - try_acquire_lock(file, LockType::Unlock) +/// - `granularity`: The [`LockGranularity`]. +pub fn clear_lock(file: Fd, granularity: LockGranularity) -> Result<(), LockError> { + try_acquire_lock(file, LockType::Unlock, granularity) } /// Returns the current lock state using [`fcntl`] with respect to the given @@ -155,8 +200,12 @@ pub fn clear_lock(file: Fd) -> Result<(), LockError> { /// /// # Parameters /// - `file`: The file for which to get the lock state. -pub fn get_lock_state(file: Fd) -> Result { - let mut flock = get_flock(LockType::Write); +/// - `granularity`: The [`LockGranularity`]. +pub fn get_lock_state( + file: Fd, + granularity: LockGranularity, +) -> Result { + let mut flock = get_flock(LockType::Write, granularity); let res = fcntl(file.as_raw_fd(), FcntlArg::F_OFD_GETLK(&mut flock)); match res { 0 => { diff --git a/virtio-devices/src/block.rs b/virtio-devices/src/block.rs index dad4fd2158..fa16bf86c2 100644 --- a/virtio-devices/src/block.rs +++ b/virtio-devices/src/block.rs @@ -19,7 +19,7 @@ use std::{io, result}; use anyhow::anyhow; use block::async_io::{AsyncIo, AsyncIoError, DiskFile}; -use block::fcntl::{LockError, LockType, get_lock_state}; +use block::fcntl::{LockError, LockGranularity, LockType, get_lock_state}; use block::{ ExecuteAsync, ExecuteError, Request, RequestType, VirtioBlockConfig, build_serial, fcntl, }; @@ -778,20 +778,42 @@ impl Block { has_feature(self.features(), VIRTIO_BLK_F_RO.into()) } + /// Returns the granularity for the advisory lock for this disk. + // TODO In future, we could add a `lock_granularity=` configuration to the CLI. + // For now, we stick to QEMU behavior. + fn lock_granularity(&mut self) -> LockGranularity { + let fallback = LockGranularity::WholeFile; + + self.disk_image + .size() + .map(|size| LockGranularity::ByteRange(0, size)) + // use a safe fallback + .unwrap_or_else(|e| { + log::warn!( + "Can't get disk size for id={},path={}, falling back to {:?}: error: {e}", + self.id, + self.disk_path.display(), + fallback + ); + fallback + }) + } + /// Tries to set an advisory lock for the corresponding disk image. pub fn try_lock_image(&mut self) -> Result<()> { let lock_type = match self.read_only() { true => LockType::Read, false => LockType::Write, }; + let granularity = self.lock_granularity(); log::debug!( - "Attempting to acquire {lock_type:?} lock for disk image id={},path={}", + "Attempting to acquire {lock_type:?} lock for disk image: id={},path={},granularity={granularity:?}", self.id, self.disk_path.display() ); let fd = self.disk_image.fd(); - fcntl::try_acquire_lock(fd, lock_type).map_err(|error| { - let current_lock = get_lock_state(fd); + fcntl::try_acquire_lock(fd, lock_type, granularity).map_err(|error| { + let current_lock = get_lock_state(fd, granularity); // Don't propagate the error to the outside, as it is not useful at all. Instead, // we try to log additional help to the user. if let Ok(current_lock) = current_lock { @@ -815,10 +837,12 @@ impl Block { /// Releases the advisory lock held for the corresponding disk image. pub fn unlock_image(&mut self) -> Result<()> { + let granularity = self.lock_granularity(); + // It is very unlikely that this fails; // Should we remove the Result to simplify the error propagation on // higher levels? - fcntl::clear_lock(self.disk_image.fd()).map_err(|error| Error::LockDiskImage { + fcntl::clear_lock(self.disk_image.fd(), granularity).map_err(|error| Error::LockDiskImage { path: self.disk_path.clone(), error, lock_type: LockType::Unlock, From 169a071c9999bbd94ae968d324c27b168c71457e Mon Sep 17 00:00:00 2001 From: Stefan Kober Date: Tue, 21 Oct 2025 15:11:07 +0200 Subject: [PATCH 211/294] api: receive_migration can get a tcp_serial_url The TCP serial mode utilizes a port on the host and needs to listen on a specific IP. If that IP or port are not available on the receiver host when migrating, we need to be able to set a new configuration for the destination host. As a shortcut, we add a tcp_serial_url parameter to the receive migration API call. The caller can specify a new value, that will lead to an update of the VM config on the receiver side. On-behalf-of: SAP stefan.kober@sap.com Signed-off-by: Stefan Kober --- src/bin/ch-remote.rs | 1 + vmm/src/api/mod.rs | 3 +++ vmm/src/lib.rs | 15 +++++++++++++-- 3 files changed, 17 insertions(+), 2 deletions(-) diff --git a/src/bin/ch-remote.rs b/src/bin/ch-remote.rs index 61e30c33d3..2614855c45 100644 --- a/src/bin/ch-remote.rs +++ b/src/bin/ch-remote.rs @@ -897,6 +897,7 @@ fn coredump_config(destination_url: &str) -> String { fn receive_migration_data(url: &str) -> String { let receive_migration_data = vmm::api::VmReceiveMigrationData { receiver_url: url.to_owned(), + tcp_serial_url: None, // Only FDs transmitted via an SCM_RIGHTS UNIX Domain Socket message // are valid. Transmitting specific FD nums via the HTTP API is // almost always invalid. diff --git a/vmm/src/api/mod.rs b/vmm/src/api/mod.rs index 9513088dff..3c45f57d2e 100644 --- a/vmm/src/api/mod.rs +++ b/vmm/src/api/mod.rs @@ -249,6 +249,9 @@ pub struct VmCoredumpData { pub struct VmReceiveMigrationData { /// URL for the reception of migration state pub receiver_url: String, + /// Optional URL if the TCP serial configuration must be changed during + /// migration. Example: "192.168.1.1:2222". + pub tcp_serial_url: Option, /// Map with new network FDs on the new host. pub net_fds: Option>, } diff --git a/vmm/src/lib.rs b/vmm/src/lib.rs index 1201832fd9..51b866ce21 100644 --- a/vmm/src/lib.rs +++ b/vmm/src/lib.rs @@ -884,6 +884,7 @@ impl Vmm { req: &Request, socket: &mut T, existing_memory_files: Option>, + tcp_serial_url: Option, ) -> std::result::Result>, MigratableError> where T: Read + Write, @@ -908,6 +909,12 @@ impl Vmm { let config = vm_migration_config.vm_config.clone(); self.vm_config = Some(vm_migration_config.vm_config); + + if let Some(tcp_serial_url) = tcp_serial_url { + let mut vm_config = self.vm_config.as_mut().unwrap().lock().unwrap(); + vm_config.serial.url = Some(tcp_serial_url); + } + self.console_info = Some(pre_create_console_devices(self).map_err(|e| { MigratableError::MigrateReceive(anyhow!("Error creating console devices: {:?}", e)) })?); @@ -2391,8 +2398,12 @@ impl RequestHandler for Vmm { continue; } - let memory_manager_config = - self.vm_receive_config(&req, &mut socket, existing_memory_files.take())?; + let memory_manager_config = self.vm_receive_config( + &req, + &mut socket, + existing_memory_files.take(), + receive_data_migration.tcp_serial_url.clone(), + )?; memory_manager = Some(memory_manager_config); if let Some(ref restored_net_configs) = receive_data_migration.net_fds { From c343b198ebebc4f215a32d67a3215037841cdaf3 Mon Sep 17 00:00:00 2001 From: Stefan Kober Date: Fri, 24 Oct 2025 14:52:51 +0200 Subject: [PATCH 212/294] serial: add write_distributor only in case of TCP As we use the write_distributor only if TCP is used, we only add it to the serial out in that case. Not doing so results in e.g. the console not working in the PTY case, because we would overwrite the out member of the serial that was already set before. On-behalf-of: SAP stefan.kober@sap.com Signed-off-by: Stefan Kober --- vmm/src/serial_manager.rs | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/vmm/src/serial_manager.rs b/vmm/src/serial_manager.rs index 01c224ec3e..fa37ab665a 100644 --- a/vmm/src/serial_manager.rs +++ b/vmm/src/serial_manager.rs @@ -338,15 +338,15 @@ impl SerialManager { if let ConsoleOutput::Tcp(_, Some(f)) = &in_file { write_distributor.add_writer(f.clone()); + serial + .as_ref() + .lock() + .unwrap() + .set_out(Some(Box::new(write_distributor.clone()))); } let mut events = [epoll::Event::new(epoll::Events::empty(), 0); EPOLL_EVENTS_LEN]; - serial - .as_ref() - .lock() - .unwrap() - .set_out(Some(Box::new(write_distributor.clone()))); loop { let num_events = match epoll::wait(epoll_fd, timeout, &mut events[..]) { From 06e95b0517f834511a2d0b567e4e4f04e5809a3b Mon Sep 17 00:00:00 2001 From: Bo Chen Date: Wed, 17 Sep 2025 00:01:47 +0000 Subject: [PATCH 213/294] misc: Fix beta clippy issues Fixed with 'cargo clippy --fix' mostly. Signed-off-by: Bo Chen --- src/main.rs | 10 ++++------ test_infra/src/lib.rs | 8 ++------ vm-migration/src/protocol.rs | 18 ++++-------------- 3 files changed, 10 insertions(+), 26 deletions(-) diff --git a/src/main.rs b/src/main.rs index 478ad4aa9c..d096f28cb1 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1092,8 +1092,7 @@ mod unit_tests { #[test] fn test_valid_vm_config_memory() { - vec![ - ( + [( vec!["cloud-hypervisor", "--kernel", "/path/to/kernel", "--memory", "size=1073741824"], r#"{ "payload": {"kernel": "/path/to/kernel"}, @@ -1148,8 +1147,7 @@ mod unit_tests { "memory": {"size": 1073741824, "hotplug_method": "VirtioMem", "hotplug_size": 1073741824} }"#, true, - ), - ] + )] .iter() .for_each(|(cli, openapi, equal)| { compare_vm_config_cli_vs_json(cli, openapi, *equal); @@ -1300,7 +1298,7 @@ mod unit_tests { #[test] fn test_valid_vm_config_net() { - vec![ + [ // This test is expected to fail because the default MAC address is // randomly generated. There's no way we can have twice the same // default value. @@ -1778,7 +1776,7 @@ mod unit_tests { #[test] #[cfg(target_arch = "x86_64")] fn test_valid_vm_config_devices() { - vec![ + [ ( vec![ "cloud-hypervisor", diff --git a/test_infra/src/lib.rs b/test_infra/src/lib.rs index 7dceaecce1..8b260a0a63 100644 --- a/test_infra/src/lib.rs +++ b/test_infra/src/lib.rs @@ -1229,18 +1229,14 @@ impl Guest { } } +#[derive(Default)] pub enum VerbosityLevel { + #[default] Warn, Info, Debug, } -impl Default for VerbosityLevel { - fn default() -> Self { - Self::Warn - } -} - impl Display for VerbosityLevel { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { use VerbosityLevel::*; diff --git a/vm-migration/src/protocol.rs b/vm-migration/src/protocol.rs index 094a8c1a30..2ed782ae39 100644 --- a/vm-migration/src/protocol.rs +++ b/vm-migration/src/protocol.rs @@ -50,8 +50,9 @@ use crate::MigratableError; // The source can at any time send an "abandon request" to cancel #[repr(u16)] -#[derive(Copy, Clone)] +#[derive(Copy, Clone, Default)] pub enum Command { + #[default] Invalid, Start, Config, @@ -62,12 +63,6 @@ pub enum Command { MemoryFd, } -impl Default for Command { - fn default() -> Self { - Self::Invalid - } -} - #[repr(C)] #[derive(Default, Copy, Clone)] pub struct Request { @@ -139,19 +134,14 @@ impl Request { } #[repr(u16)] -#[derive(Copy, Clone, PartialEq, Eq)] +#[derive(Copy, Clone, PartialEq, Eq, Default)] pub enum Status { + #[default] Invalid, Ok, Error, } -impl Default for Status { - fn default() -> Self { - Self::Invalid - } -} - #[repr(C)] #[derive(Default, Copy, Clone)] pub struct Response { From 079b64627d2b9c0de63f2492ed559348157f4fe9 Mon Sep 17 00:00:00 2001 From: Philipp Schuster Date: Thu, 30 Oct 2025 14:34:25 +0100 Subject: [PATCH 214/294] ci: enforce SAP commit style To check gitlint locally, one can run: gitlint --commits "HEAD~2..HEAD" which for example checks the last two commits. Although this is just our kinda private (but public) fork, people might cherry-pick commits from us for whatever reason. So we should have proper commit style. On-behalf-of: SAP philipp.schuster@sap.com --- .github/workflows/commit-lint.yml | 23 +++++++++++++ scripts/gitlint/rules/on-behalf-of-marker.py | 36 ++++++++++++++++++++ 2 files changed, 59 insertions(+) create mode 100644 .github/workflows/commit-lint.yml create mode 100644 scripts/gitlint/rules/on-behalf-of-marker.py diff --git a/.github/workflows/commit-lint.yml b/.github/workflows/commit-lint.yml new file mode 100644 index 0000000000..ec2dfec7ac --- /dev/null +++ b/.github/workflows/commit-lint.yml @@ -0,0 +1,23 @@ +name: Commit Lint +on: [ pull_request ] +jobs: + gitlint: + name: Check commit messages + runs-on: ubuntu-latest + steps: + - name: Checkout repository + uses: actions/checkout@v4 + with: + ref: ${{ github.event.pull_request.head.sha }} + fetch-depth: 0 + - name: Set up Python + uses: actions/setup-python@v3 + with: + python-version: "3.10" + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install --upgrade gitlint + - name: Lint git commit messages + run: | + gitlint --commits origin/$GITHUB_BASE_REF.. diff --git a/scripts/gitlint/rules/on-behalf-of-marker.py b/scripts/gitlint/rules/on-behalf-of-marker.py new file mode 100644 index 0000000000..d08e334b17 --- /dev/null +++ b/scripts/gitlint/rules/on-behalf-of-marker.py @@ -0,0 +1,36 @@ +from gitlint.rules import LineRule, RuleViolation, CommitMessageTitle, CommitRule + +class BodyContainsOnBehalfOfSAPMarker(CommitRule): + """Enforce that each commit coming from an SAP contractor contains an + "On-behalf-of SAP user@sap.com" marker. + """ + + # A rule MUST have a human friendly name + name = "body-requires-on-behalf-of-sap" + + # A rule MUST have a *unique* id + # We recommend starting with UC (for User-defined Commit-rule). + id = "UC-sap" + + # Lower-case list of contractors + contractors = [ + "@cyberus-technology.de" + ] + + # Marker followed by " name.surname@sap.com" + marker = "On-behalf-of: SAP" + + def validate(self, commit): + if "@sap.com" in commit.author_email.lower(): + return + + # Allow third-party open-source contributions + if not any(contractor in commit.author_email.lower() for contractor in self.contractors): + return + + for line in commit.message.body: + if line.startswith(self.marker) and "@sap.com" in line.lower(): + return + + msg = f"Body does not contain a '{self.marker} user@sap.com' line" + return [RuleViolation(self.id, msg, line_nr=1)] From 803acd5822b51b457b7016f1e0ffe19c49a445f1 Mon Sep 17 00:00:00 2001 From: Philipp Schuster Date: Thu, 30 Oct 2025 15:21:43 +0100 Subject: [PATCH 215/294] ci: skip more unneeded CI overwhelming our runners Signed-off-by: Philipp Schuster On-behalf-of: SAP philipp.schuster@sap.com --- .github/workflows/build.yaml | 5 ---- .github/workflows/preview-riscv64-build.yaml | 30 -------------------- 2 files changed, 35 deletions(-) delete mode 100644 .github/workflows/preview-riscv64-build.yaml diff --git a/.github/workflows/build.yaml b/.github/workflows/build.yaml index 286c2af548..5984430b89 100644 --- a/.github/workflows/build.yaml +++ b/.github/workflows/build.yaml @@ -13,21 +13,16 @@ jobs: matrix: rust: - stable - - beta - nightly - "1.88.0" target: - x86_64-unknown-linux-gnu - - x86_64-unknown-linux-musl steps: - name: Code checkout uses: actions/checkout@v5 with: fetch-depth: 0 - - name: Install musl-gcc - run: sudo apt install -y musl-tools - - name: Install Rust toolchain (${{ matrix.rust }}) uses: dtolnay/rust-toolchain@stable with: diff --git a/.github/workflows/preview-riscv64-build.yaml b/.github/workflows/preview-riscv64-build.yaml deleted file mode 100644 index 929a60147a..0000000000 --- a/.github/workflows/preview-riscv64-build.yaml +++ /dev/null @@ -1,30 +0,0 @@ -name: Cloud Hypervisor RISC-V 64-bit kvm build Preview -on: [pull_request, merge_group] -concurrency: - group: ${{ github.workflow }}-${{ github.ref }} - cancel-in-progress: true - -jobs: - build: - name: Cargo - runs-on: riscv64-qemu-host - strategy: - fail-fast: false - - steps: - - name: Code checkout - uses: actions/checkout@v5 - with: - fetch-depth: 0 - - - name: Install Rust toolchain - run: /opt/scripts/exec-in-qemu.sh rustup default 1.88.0 - - - name: Build test (kvm) - run: /opt/scripts/exec-in-qemu.sh cargo rustc --locked --no-default-features --features "kvm" - - - name: Clippy test (kvm) - run: /opt/scripts/exec-in-qemu.sh cargo clippy --locked --no-default-features --features "kvm" - - - name: Check no files were modified - run: test -z "$(git status --porcelain)" From fddea8b0c775cdb102f479e9af97819ac4f7e706 Mon Sep 17 00:00:00 2001 From: Philipp Schuster Date: Mon, 3 Nov 2025 10:54:49 +0100 Subject: [PATCH 216/294] ci: enable more of normal CHV CI in our GitHub We skip the integration-tests as they will not run here anyway. Signed-off-by: Philipp Schuster On-behalf-of: SAP philipp.schuster@sap.com --- .github/workflows/integration-x86-64.yaml | 14 +------------- 1 file changed, 1 insertion(+), 13 deletions(-) diff --git a/.github/workflows/integration-x86-64.yaml b/.github/workflows/integration-x86-64.yaml index 8ed76f16a1..b5b8ec9071 100644 --- a/.github/workflows/integration-x86-64.yaml +++ b/.github/workflows/integration-x86-64.yaml @@ -10,8 +10,7 @@ jobs: strategy: fail-fast: false matrix: - runner: ['garm-jammy', "garm-jammy-amd"] - libc: ["musl", 'gnu'] + libc: ['gnu'] name: Tests (x86-64) runs-on: ${{ github.event_name == 'pull_request' && !(matrix.runner == 'garm-jammy' && matrix.libc == 'gnu') && 'ubuntu-latest' || format('{0}-16', matrix.runner) }} steps: @@ -36,17 +35,6 @@ jobs: - name: Run unit tests if: ${{ github.event_name != 'pull_request' || (matrix.runner == 'garm-jammy' && matrix.libc == 'gnu') }} run: scripts/dev_cli.sh tests --unit --libc ${{ matrix.libc }} - - name: Load openvswitch module - if: ${{ github.event_name != 'pull_request' || (matrix.runner == 'garm-jammy' && matrix.libc == 'gnu') }} - run: sudo modprobe openvswitch - - name: Run integration tests - if: ${{ github.event_name != 'pull_request' || (matrix.runner == 'garm-jammy' && matrix.libc == 'gnu') }} - timeout-minutes: 40 - run: scripts/dev_cli.sh tests --integration --libc ${{ matrix.libc }} - - name: Run live-migration integration tests - if: ${{ github.event_name != 'pull_request' || (matrix.runner == 'garm-jammy' && matrix.libc == 'gnu') }} - timeout-minutes: 20 - run: scripts/dev_cli.sh tests --integration-live-migration --libc ${{ matrix.libc }} - name: Skipping build for PR if: ${{ github.event_name == 'pull_request' && matrix.runner != 'garm-jammy' && matrix.libc != 'gnu' }} run: echo "Skipping build for PR" From 704a7cd1156d7a1cb0f37a576ce62552d8a6d018 Mon Sep 17 00:00:00 2001 From: Philipp Schuster Date: Thu, 30 Oct 2025 14:35:57 +0100 Subject: [PATCH 217/294] misc: fix typos On-behalf-of: SAP philipp.schuster@sap.com --- tests/integration.rs | 2 +- vmm/src/lib.rs | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/integration.rs b/tests/integration.rs index 8369560799..5ad03d0b2c 100644 --- a/tests/integration.rs +++ b/tests/integration.rs @@ -10982,7 +10982,7 @@ mod live_migration { ); }; - // Post live-migration check to make sure the destination VM is funcational + // Post live-migration check to make sure the destination VM is functional let r = std::panic::catch_unwind(|| { // Perform same checks to validate VM has been properly migrated assert_eq!(guest.get_cpu_count().unwrap_or_default(), boot_vcpus); diff --git a/vmm/src/lib.rs b/vmm/src/lib.rs index 51b866ce21..dd652e9137 100644 --- a/vmm/src/lib.rs +++ b/vmm/src/lib.rs @@ -2407,7 +2407,7 @@ impl RequestHandler for Vmm { memory_manager = Some(memory_manager_config); if let Some(ref restored_net_configs) = receive_data_migration.net_fds { - // TODO do some validaiton + // TODO do some validation //restored_net_config.validate(); // Update VM's net configurations with new fds received for restore operation From dfab5c7c6b3c25699ee8eee5eafbced99a8dcf53 Mon Sep 17 00:00:00 2001 From: Julian Stecklina Date: Wed, 29 Oct 2025 14:32:22 +0100 Subject: [PATCH 218/294] vmm: don't needlessly clone strings On-behalf-of: SAP julian.stecklina@sap.com Signed-off-by: Julian Stecklina --- vmm/src/lib.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vmm/src/lib.rs b/vmm/src/lib.rs index dd652e9137..00c18e2734 100644 --- a/vmm/src/lib.rs +++ b/vmm/src/lib.rs @@ -1853,7 +1853,7 @@ impl RequestHandler for Vmm { for net in restored_nets.iter() { for net_config in vm_net_configs.iter_mut() { // update only if the net dev is backed by FDs - if net_config.id == Some(net.id.clone()) && net_config.fds.is_some() { + if net_config.id.as_ref() == Some(&net.id) && net_config.fds.is_some() { net_config.fds.clone_from(&net.fds); } } From 82bdc7fd0a30a78afc99e058da0790c72e3ed06d Mon Sep 17 00:00:00 2001 From: Julian Stecklina Date: Thu, 16 Oct 2025 15:01:00 +0200 Subject: [PATCH 219/294] vmm: allow comparing commands and extracting response length This will be useful later when we rebuild the live migration code as a state machine. On-behalf-of: SAP julian.stecklina@sap.com Signed-off-by: Julian Stecklina --- vm-migration/src/protocol.rs | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/vm-migration/src/protocol.rs b/vm-migration/src/protocol.rs index 2ed782ae39..26ce00bf75 100644 --- a/vm-migration/src/protocol.rs +++ b/vm-migration/src/protocol.rs @@ -50,7 +50,7 @@ use crate::MigratableError; // The source can at any time send an "abandon request" to cancel #[repr(u16)] -#[derive(Copy, Clone, Default)] +#[derive(Debug, Copy, Clone, Default, PartialEq, Eq)] pub enum Command { #[default] Invalid, @@ -174,6 +174,10 @@ impl Response { self.status } + pub fn length(&self) -> u64 { + self.length + } + pub fn read_from(fd: &mut dyn Read) -> Result { let mut response = Response::default(); fd.read_exact(Self::as_mut_slice(&mut response)) From a0fa4c33f3fbcab23e0d3f4431c51e9b0dee58c0 Mon Sep 17 00:00:00 2001 From: Julian Stecklina Date: Mon, 3 Nov 2025 16:27:12 +0100 Subject: [PATCH 220/294] vmm: simplify receiving memory fds ... and nuke some Option<> while I was there. Given that HashMap has a usable default and we end up passing an empty HashMap anyway, just get rid of the Option. On-behalf-of: SAP julian.stecklina@sap.com Signed-off-by: Julian Stecklina --- vmm/src/lib.rs | 55 +++++++++++++++++---------------------- vmm/src/memory_manager.rs | 6 ++--- vmm/src/vm.rs | 2 +- 3 files changed, 28 insertions(+), 35 deletions(-) diff --git a/vmm/src/lib.rs b/vmm/src/lib.rs index 00c18e2734..7c38b89e5e 100644 --- a/vmm/src/lib.rs +++ b/vmm/src/lib.rs @@ -879,11 +879,30 @@ impl Vmm { }) } + /// Try to receive a file descriptor from a socket. Returns the slot number and the file descriptor. + fn vm_receive_memory_fd( + socket: &mut SocketStream, + ) -> std::result::Result<(u32, File), MigratableError> { + if let SocketStream::Unix(unix_socket) = socket { + let mut buf = [0u8; 4]; + let (_, file) = unix_socket.recv_with_fd(&mut buf).map_err(|e| { + MigratableError::MigrateReceive(anyhow!("Error receiving slot from socket: {}", e)) + })?; + + file.ok_or_else(|| MigratableError::MigrateReceive(anyhow!("Failed to receive socket"))) + .map(|file| (u32::from_le_bytes(buf), file)) + } else { + Err(MigratableError::MigrateReceive(anyhow!( + "Unsupported socket type" + ))) + } + } + fn vm_receive_config( &mut self, req: &Request, socket: &mut T, - existing_memory_files: Option>, + existing_memory_files: HashMap, tcp_serial_url: Option, ) -> std::result::Result>, MigratableError> where @@ -2378,7 +2397,7 @@ impl RequestHandler for Vmm { let mut started = false; let mut memory_manager: Option>> = None; - let mut existing_memory_files = None; + let mut existing_memory_files = vec![]; loop { let req = Request::read_from(&mut socket)?; match req.command() { @@ -2401,7 +2420,7 @@ impl RequestHandler for Vmm { let memory_manager_config = self.vm_receive_config( &req, &mut socket, - existing_memory_files.take(), + HashMap::from_iter(existing_memory_files.drain(..)), receive_data_migration.tcp_serial_url.clone(), )?; memory_manager = Some(memory_manager_config); @@ -2471,35 +2490,9 @@ impl RequestHandler for Vmm { continue; } - match &mut socket { - SocketStream::Unix(unix_socket) => { - let mut buf = [0u8; 4]; - let (_, file) = unix_socket.recv_with_fd(&mut buf).map_err(|e| { - MigratableError::MigrateReceive(anyhow!( - "Error receiving slot from socket: {}", - e - )) - })?; - - if existing_memory_files.is_none() { - existing_memory_files = Some(HashMap::default()) - } + existing_memory_files.push(Self::vm_receive_memory_fd(&mut socket)?); - if let Some(ref mut existing_memory_files) = existing_memory_files { - let slot = u32::from_le_bytes(buf); - existing_memory_files.insert(slot, file.unwrap()); - } - - Response::ok().write_to(&mut socket)?; - } - SocketStream::Tcp(_tcp_socket) => { - // For TCP sockets, we cannot transfer file descriptors - warn!( - "MemoryFd command received over TCP socket, which is not supported" - ); - Response::error().write_to(&mut socket)?; - } - } + Response::ok().write_to(&mut socket)?; } Command::Complete => { info!("Complete Command Received"); diff --git a/vmm/src/memory_manager.rs b/vmm/src/memory_manager.rs index ff040a043e..c1a7b4159b 100644 --- a/vmm/src/memory_manager.rs +++ b/vmm/src/memory_manager.rs @@ -994,7 +994,7 @@ impl MemoryManager { phys_bits: u8, #[cfg(feature = "tdx")] tdx_enabled: bool, restore_data: Option<&MemoryManagerSnapshotData>, - existing_memory_files: Option>, + existing_memory_files: HashMap, ) -> Result>, Error> { trace_scoped!("MemoryManager::new"); @@ -1029,7 +1029,7 @@ impl MemoryManager { &data.guest_ram_mappings, &zones, prefault, - existing_memory_files.unwrap_or_default(), + existing_memory_files, config.thp, )?; let guest_memory = @@ -1262,7 +1262,7 @@ impl MemoryManager { #[cfg(feature = "tdx")] false, Some(&mem_snapshot), - None, + Default::default(), )?; mm.lock() diff --git a/vmm/src/vm.rs b/vmm/src/vm.rs index 58fbae6f5a..e824a729c5 100644 --- a/vmm/src/vm.rs +++ b/vmm/src/vm.rs @@ -1084,7 +1084,7 @@ impl Vm { #[cfg(feature = "tdx")] tdx_enabled, None, - None, + Default::default(), ) .map_err(Error::MemoryManager)? }; From 1fa6a03d2e1e81f917b9ca7e8ba4297d3c7b52e7 Mon Sep 17 00:00:00 2001 From: Julian Stecklina Date: Thu, 9 Oct 2025 15:48:31 +0200 Subject: [PATCH 221/294] vmm: refactor live migration receive into state machine Previously, state that we accumulated during the migration process in the receiver was kept in `mut Option` variables or HashMaps. The problem is that it is unclear in the code when these variables can be safely used. It's also difficult to add new state, such as the state we need to handle the upcoming feature for performing the migration using multiple connections. To solve this, I've refactored the code to use the state machine pattern. Each state carries the data it needs to. Any state that didn't arrive yet (memory_files, memory_manager) cannot be accessed until we are in the proper state. Some benefits that fall out of this: - We now respond to all requests, even invalid ones, at least with an error message. - Any error handling a request will result in an error message being sent. - There is only a single place where responses are sent and thus it's very hard to mess this up in the code. - The main protocol state machine fits on a screen. I would argue that especially the error cases are now much more consistent. There is still a lot to be done. There is still state transfer via self.vm and similar. In an ideal world, this would also be carried by the state machine. I also want to see better handling of payloads, which are still handled all over the place, but this change is already too big. :) On-behalf-of: SAP julian.stecklina@sap.com Signed-off-by: Julian Stecklina --- vmm/src/lib.rs | 300 ++++++++++++++++++++++++++++--------------------- 1 file changed, 172 insertions(+), 128 deletions(-) diff --git a/vmm/src/lib.rs b/vmm/src/lib.rs index 7c38b89e5e..ed4c8b36f3 100644 --- a/vmm/src/lib.rs +++ b/vmm/src/lib.rs @@ -727,6 +727,39 @@ pub struct Vmm { console_info: Option, } +/// The receiver's state machine behind the migration protocol. +enum ReceiveMigrationState { + /// The connection is established and we haven't received any commands yet. + Established, + + /// We received the start command. + Started, + + /// We received file descriptors for memory. This can only happen on UNIX domain sockets. + MemoryFdsReceived(Vec<(u32, File)>), + + /// We received the VM configuration. We keep the memory configuration around to populate guest memory. From this point on, the sender can start sending memory updates. + Configured(Arc>), + + /// Memory is populated and we received the state. The VM is ready to go. + StateReceived, + + /// The migration is successful. + Completed, + + /// The migration couldn't complete, either due to an error or because the sender abandoned the migration. + Aborted, +} + +impl ReceiveMigrationState { + fn finished(&self) -> bool { + matches!( + self, + ReceiveMigrationState::Completed | ReceiveMigrationState::Aborted + ) + } +} + impl Vmm { pub const HANDLED_SIGNALS: [i32; 2] = [SIGTERM, SIGINT]; @@ -898,6 +931,118 @@ impl Vmm { } } + /// Handle a migration command and advance the protocol state machine. + /// + /// **Note**: This function is responsible for consuming any payloads! It also must + /// _not_ write any response to the socket. + fn vm_receive_migration_step( + &mut self, + socket: &mut SocketStream, + state: ReceiveMigrationState, + req: &Request, + receive_data_migration: &VmReceiveMigrationData, + ) -> std::result::Result { + use ReceiveMigrationState::*; + + let invalid_command = || { + Err(MigratableError::MigrateReceive(anyhow!( + "Can't handle command in current state" + ))) + }; + + let mut configure_vm = + |socket: &mut SocketStream, + memory_files: HashMap| + -> std::result::Result { + let memory_manager = self.vm_receive_config( + req, + socket, + memory_files, + receive_data_migration.tcp_serial_url.clone(), + )?; + + if let Some(ref restored_net_configs) = receive_data_migration.net_fds { + // TODO do some validation + //restored_net_config.validate(); + // Update VM's net configurations with new fds received for restore operation + + let mut vm_config = self.vm_config.as_mut().unwrap().lock().unwrap(); + + for net in restored_net_configs { + for net_config in vm_config.net.iter_mut().flatten() { + // update only if the net dev is backed by FDs + if net_config.id.as_ref() == Some(&net.id) && net_config.fds.is_some() { + log::error!( + "overwriting net fds: id={}, old={:?}, new={:?}", + net.id, + &net_config.fds, + &net.fds + ); + net_config.fds.clone_from(&net.fds); + } + } + } + } + + Ok(Configured(memory_manager)) + }; + + let recv_memory_fd = + |socket: &mut SocketStream, + mut memory_files: Vec<(u32, File)>| + -> std::result::Result { + let (slot, file) = Self::vm_receive_memory_fd(socket)?; + + memory_files.push((slot, file)); + Ok(MemoryFdsReceived(memory_files)) + }; + + if req.command() == Command::Abandon { + return Ok(Aborted); + } + + match state { + Established => match req.command() { + Command::Start => Ok(Started), + _ => invalid_command(), + }, + Started => match req.command() { + Command::MemoryFd => recv_memory_fd(socket, Vec::new()), + Command::Config => configure_vm(socket, Default::default()), + _ => invalid_command(), + }, + MemoryFdsReceived(memory_files) => match req.command() { + Command::MemoryFd => recv_memory_fd(socket, memory_files), + Command::Config => configure_vm(socket, HashMap::from_iter(memory_files)), + _ => invalid_command(), + }, + Configured(memory_manager) => match req.command() { + Command::Memory => { + self.vm_receive_memory(req, socket, &mut memory_manager.lock().unwrap())?; + Ok(Configured(memory_manager)) + } + Command::State => { + self.vm_receive_state(req, socket, memory_manager.clone())?; + Ok(StateReceived) + } + _ => invalid_command(), + }, + StateReceived => match req.command() { + Command::Complete => { + // The unwrap is safe, because the state machine makes sure we called + // vm_receive_state before, which creates the VM. + let vm = self.vm.as_mut().unwrap(); + vm.resume()?; + Ok(Completed) + } + _ => invalid_command(), + }, + Completed | Aborted => { + unreachable!("Performed a step on the finished state machine") + } + } + } + fn vm_receive_config( &mut self, req: &Request, @@ -906,7 +1051,7 @@ impl Vmm { tcp_serial_url: Option, ) -> std::result::Result>, MigratableError> where - T: Read + Write, + T: Read, { // Read in config data along with memory manager data let mut data: Vec = Vec::new(); @@ -992,8 +1137,6 @@ impl Vmm { )) })?; - Response::ok().write_to(socket)?; - Ok(memory_manager) } @@ -1004,7 +1147,7 @@ impl Vmm { mm: Arc>, ) -> std::result::Result<(), MigratableError> where - T: Read + Write, + T: Read, { // Read in state data let mut data: Vec = Vec::new(); @@ -1057,13 +1200,10 @@ impl Vmm { // Create VM vm.restore().map_err(|e| { - Response::error().write_to(socket).ok(); MigratableError::MigrateReceive(anyhow!("Failed restoring the Vm: {}", e)) })?; self.vm = Some(vm); - Response::ok().write_to(socket)?; - Ok(()) } @@ -1074,18 +1214,13 @@ impl Vmm { memory_manager: &mut MemoryManager, ) -> std::result::Result<(), MigratableError> where - T: Read + ReadVolatile + Write, + T: Read + ReadVolatile, { // Read table let table = MemoryRangeTable::read_from(socket, req.length())?; // And then read the memory itself - memory_manager - .receive_memory_regions(&table, socket) - .inspect_err(|_| { - Response::error().write_to(socket).ok(); - })?; - Response::ok().write_to(socket)?; + memory_manager.receive_memory_regions(&table, socket)?; Ok(()) } @@ -2395,124 +2530,33 @@ impl RequestHandler for Vmm { // Accept the connection and get the socket let mut socket = Vmm::receive_migration_socket(&receive_data_migration.receiver_url)?; - let mut started = false; - let mut memory_manager: Option>> = None; - let mut existing_memory_files = vec![]; - loop { - let req = Request::read_from(&mut socket)?; - match req.command() { - Command::Invalid => info!("Invalid Command Received"), - Command::Start => { - info!("Start Command Received"); - started = true; - - Response::ok().write_to(&mut socket)?; - } - Command::Config => { - info!("Config Command Received"); - - if !started { - warn!("Migration not started yet"); - Response::error().write_to(&mut socket)?; - continue; - } - - let memory_manager_config = self.vm_receive_config( - &req, - &mut socket, - HashMap::from_iter(existing_memory_files.drain(..)), - receive_data_migration.tcp_serial_url.clone(), - )?; - memory_manager = Some(memory_manager_config); - - if let Some(ref restored_net_configs) = receive_data_migration.net_fds { - // TODO do some validation - //restored_net_config.validate(); - // Update VM's net configurations with new fds received for restore operation - - let mut vm_config = self.vm_config.as_mut().unwrap().lock().unwrap(); - { - for net in restored_net_configs { - for net_config in vm_config.net.iter_mut().flatten() { - // update only if the net dev is backed by FDs - if net_config.id == Some(net.id.clone()) - && net_config.fds.is_some() - { - log::error!( - "overwriting net fds: id={}, old={:?}, new={:?}", - net.id, - &net_config.fds, - &net.fds - ); - net_config.fds.clone_from(&net.fds); - } - } - } - } - }; - } - Command::State => { - info!("State Command Received"); - - if !started { - warn!("Migration not started yet"); - Response::error().write_to(&mut socket)?; - continue; - } - if let Some(mm) = memory_manager.take() { - self.vm_receive_state(&req, &mut socket, mm)?; - } else { - warn!("Configuration not sent yet"); - Response::error().write_to(&mut socket)?; - } - } - Command::Memory => { - info!("Memory Command Received"); + let mut state = ReceiveMigrationState::Established; - if !started { - warn!("Migration not started yet"); - Response::error().write_to(&mut socket)?; - continue; - } - if let Some(mm) = memory_manager.as_ref() { - self.vm_receive_memory(&req, &mut socket, &mut mm.lock().unwrap())?; - } else { - warn!("Configuration not sent yet"); - Response::error().write_to(&mut socket)?; - } + while !state.finished() { + let req = Request::read_from(&mut socket)?; + trace!("Command {:?} received", req.command()); + + let (response, new_state) = match self.vm_receive_migration_step( + &mut socket, + state, + &req, + &receive_data_migration, + ) { + Ok(next_state) => (Response::ok(), next_state), + Err(err) => { + warn!("Migration command {:?} failed: {}", req.command(), err); + (Response::error(), ReceiveMigrationState::Aborted) } - Command::MemoryFd => { - info!("MemoryFd Command Received"); - - if !started { - warn!("Migration not started yet"); - Response::error().write_to(&mut socket)?; - continue; - } + }; - existing_memory_files.push(Self::vm_receive_memory_fd(&mut socket)?); + state = new_state; + assert_eq!(response.length(), 0); + response.write_to(&mut socket)?; + } - Response::ok().write_to(&mut socket)?; - } - Command::Complete => { - info!("Complete Command Received"); - if let Some(ref mut vm) = self.vm.as_mut() { - vm.resume()?; - Response::ok().write_to(&mut socket)?; - } else { - warn!("VM not created yet"); - Response::error().write_to(&mut socket)?; - } - break; - } - Command::Abandon => { - info!("Abandon Command Received"); - self.vm = None; - self.vm_config = None; - Response::ok().write_to(&mut socket).ok(); - break; - } - } + if let ReceiveMigrationState::Aborted = state { + self.vm = None; + self.vm_config = None; } Ok(()) From a1da8cc90d7351b1efef1e6366a11d4a081821d5 Mon Sep 17 00:00:00 2001 From: Julian Stecklina Date: Fri, 7 Nov 2025 14:51:23 +0100 Subject: [PATCH 222/294] vm-migration: add helper to iterate over bitmaps Instead of using ad-hoc code, just write an extension to the Iterator trait that we can easily unit test. On-behalf-of: SAP julian.stecklina@sap.com Signed-off-by: Julian Stecklina --- vm-migration/src/bitpos_iterator.rs | 88 +++++++++++++++++++++++++++++ vm-migration/src/lib.rs | 1 + 2 files changed, 89 insertions(+) create mode 100644 vm-migration/src/bitpos_iterator.rs diff --git a/vm-migration/src/bitpos_iterator.rs b/vm-migration/src/bitpos_iterator.rs new file mode 100644 index 0000000000..8d70c7ff6b --- /dev/null +++ b/vm-migration/src/bitpos_iterator.rs @@ -0,0 +1,88 @@ +// Copyright © 2025 Cyberus Technology GmbH +// +// SPDX-License-Identifier: Apache-2.0 + +/// An iterator that turns a sequence of u64s into a sequence of bit positions +/// that are set. +/// +/// This is useful to iterate over dirty memory bitmaps. +struct BitposIterator { + underlying_it: I, + + /// How many u64's we've already consumed. + word_pos: usize, + + /// If we already started working on a u64, it's here. Together with the bit + /// position where we have to continue. + current_word: Option<(u64, u32)>, +} + +impl Iterator for BitposIterator +where + I: Iterator, +{ + type Item = u64; + + fn next(&mut self) -> Option { + loop { + if self.current_word.is_none() { + self.current_word = self.underlying_it.next().map(|w| (w, 0)); + } + + let (word, word_bit) = self.current_word?; + + // Continue early if there is no chance to find something. + if word != 0 && word_bit < 64 { + let shifted_word = word >> word_bit; + if shifted_word != 0 { + let zeroes = shifted_word.trailing_zeros(); + + self.current_word = Some((word, zeroes + word_bit + 1)); + let next_bitpos = + u64::try_from(self.word_pos).unwrap() * 64 + u64::from(word_bit + zeroes); + + return Some(next_bitpos); + } + } + + self.current_word = None; + self.word_pos += 1; + } + } +} + +pub trait BitposIteratorExt: Iterator + Sized { + /// Turn an iterator over `u64` into an iterator over the bit positions of + /// all 1s. We basically treat the incoming `u64` as one gigantic integer + /// and just spit out which bits are set. + fn bit_positions(self) -> impl Iterator { + BitposIterator { + underlying_it: self, + word_pos: 0, + current_word: None, + } + } +} + +impl + Sized> BitposIteratorExt for I {} + +#[cfg(test)] +mod tests { + use super::*; + + fn bitpos_check(inp: &[u64], out: &[u64]) { + assert_eq!(inp.iter().copied().bit_positions().collect::>(), out); + } + + #[test] + fn bitpos_iterator_works() { + bitpos_check(&[], &[]); + bitpos_check(&[0], &[]); + bitpos_check(&[1], &[0]); + bitpos_check(&[5], &[0, 2]); + bitpos_check(&[3 + 32], &[0, 1, 5]); + bitpos_check(&[1 << 63], &[63]); + + bitpos_check(&[1, 1 + 32], &[0, 64, 69]); + } +} diff --git a/vm-migration/src/lib.rs b/vm-migration/src/lib.rs index 00f322636a..2e5f6e5791 100644 --- a/vm-migration/src/lib.rs +++ b/vm-migration/src/lib.rs @@ -9,6 +9,7 @@ use thiserror::Error; use crate::protocol::MemoryRangeTable; +mod bitpos_iterator; pub mod protocol; #[derive(Error, Debug)] From bcfd7b0cf96c04deeb673ad02447fa079b1e482c Mon Sep 17 00:00:00 2001 From: Julian Stecklina Date: Fri, 7 Nov 2025 14:51:49 +0100 Subject: [PATCH 223/294] vm-migration: add itertools as common dependency It improves iteration code a lot! I'll use it in the upcoming commit to speed up dirty bitmap scanning. On-behalf-of: SAP julian.stecklina@sap.com Signed-off-by: Julian Stecklina --- Cargo.lock | 16 ++++++++++++++++ Cargo.toml | 1 + vm-migration/Cargo.toml | 1 + 3 files changed, 18 insertions(+) diff --git a/Cargo.lock b/Cargo.lock index a56f90cad2..1989a0e18b 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -583,6 +583,12 @@ dependencies = [ "windows-sys 0.60.2", ] +[[package]] +name = "either" +version = "1.15.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "48c757948c5ede0e46177b7add2e67155f70e33c07fea8284df6576da70b3719" + [[package]] name = "endi" version = "1.1.0" @@ -1011,6 +1017,15 @@ version = "1.70.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7943c866cc5cd64cbc25b2e01621d07fa8eb2a1a23160ee81ce38704e97b8ecf" +[[package]] +name = "itertools" +version = "0.14.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2b192c782037fadd9cfa75548310488aabdbf3d2da73885b31bd0abd03351285" +dependencies = [ + "either", +] + [[package]] name = "itoa" version = "1.0.15" @@ -2387,6 +2402,7 @@ name = "vm-migration" version = "0.1.0" dependencies = [ "anyhow", + "itertools", "serde", "serde_json", "thiserror 2.0.12", diff --git a/Cargo.toml b/Cargo.toml index 1ce6d53ba3..d84f4f683d 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -160,6 +160,7 @@ dirs = "6.0.0" env_logger = "0.11.8" epoll = "4.3.3" flume = "0.11.1" +itertools = "0.14.0" libc = "0.2.167" log = "0.4.22" signal-hook = "0.3.18" diff --git a/vm-migration/Cargo.toml b/vm-migration/Cargo.toml index 7a8c9337b3..69d57076f5 100644 --- a/vm-migration/Cargo.toml +++ b/vm-migration/Cargo.toml @@ -6,6 +6,7 @@ version = "0.1.0" [dependencies] anyhow = { workspace = true } +itertools = { workspace = true } serde = { workspace = true, features = ["derive", "rc"] } serde_json = { workspace = true } thiserror = { workspace = true } From 79f1a1af7bf074c0d040dd390dd33c1570be77fb Mon Sep 17 00:00:00 2001 From: Julian Stecklina Date: Fri, 7 Nov 2025 14:53:34 +0100 Subject: [PATCH 224/294] vm-migration: optimize dirty bitmap scanning With this change, we don't need a copy of the vector. Just something that can be coerced into an iterator. We also use the bit position iterator to make the code somewhat clearer. The new code is much faster, because it will not iterate over every bit, just each 1 bit in the input. The next commit will complete this optimization and have some concrete numbers. On-behalf-of: SAP julian.stecklina@sap.com Signed-off-by: Julian Stecklina --- vm-migration/src/protocol.rs | 61 +++++++++++++++++++++--------------- 1 file changed, 36 insertions(+), 25 deletions(-) diff --git a/vm-migration/src/protocol.rs b/vm-migration/src/protocol.rs index 26ce00bf75..2eeb155927 100644 --- a/vm-migration/src/protocol.rs +++ b/vm-migration/src/protocol.rs @@ -5,10 +5,12 @@ use std::io::{Read, Write}; +use itertools::Itertools; use serde::{Deserialize, Serialize}; use vm_memory::ByteValued; use crate::MigratableError; +use crate::bitpos_iterator::BitposIteratorExt; // Migration protocol // 1: Source establishes communication with destination (file socket or TCP connection.) @@ -215,38 +217,47 @@ pub struct MemoryRange { pub length: u64, } +impl MemoryRange { + /// Turn an iterator over the dirty bitmap into an iterator of dirty ranges. + pub fn dirty_ranges( + bitmap: impl IntoIterator, + start_addr: u64, + page_size: u64, + ) -> impl Iterator { + bitmap + .into_iter() + .bit_positions() + // Turn them into single-element ranges for coalesce. + .map(|b| b..(b + 1)) + // Merge adjacent ranges. + .coalesce(|prev, curr| { + if prev.end == curr.start { + Ok(prev.start..curr.end) + } else { + Err((prev, curr)) + } + }) + .map(move |r| Self { + gpa: start_addr + r.start * page_size, + length: (r.end - r.start) * page_size, + }) + } +} + #[derive(Clone, Default, Serialize, Deserialize)] pub struct MemoryRangeTable { data: Vec, } impl MemoryRangeTable { - pub fn from_bitmap(bitmap: Vec, start_addr: u64, page_size: u64) -> Self { - let mut table = MemoryRangeTable::default(); - let mut entry: Option = None; - for (i, block) in bitmap.iter().enumerate() { - for j in 0..64 { - let is_page_dirty = ((block >> j) & 1u64) != 0u64; - let page_offset = ((i * 64) + j) as u64 * page_size; - if is_page_dirty { - if let Some(entry) = &mut entry { - entry.length += page_size; - } else { - entry = Some(MemoryRange { - gpa: start_addr + page_offset, - length: page_size, - }); - } - } else if let Some(entry) = entry.take() { - table.push(entry); - } - } - } - if let Some(entry) = entry.take() { - table.push(entry); + pub fn from_bitmap( + bitmap: impl IntoIterator, + start_addr: u64, + page_size: u64, + ) -> Self { + Self { + data: MemoryRange::dirty_ranges(bitmap, start_addr, page_size).collect(), } - - table } pub fn regions(&self) -> &[MemoryRange] { From 3b46a49eccfebaf8527cbd3389a51f06c9402589 Mon Sep 17 00:00:00 2001 From: Julian Stecklina Date: Fri, 7 Nov 2025 15:04:11 +0100 Subject: [PATCH 225/294] virtio-devices: mark a possible improvement This would be a good opportunity to optimize another pointless vector away, but I don't have a good way to test this at the moment. But maybe someone else gives it a shot. On-behalf-of: SAP julian.stecklina@sap.com Signed-off-by: Julian Stecklina --- virtio-devices/src/mem.rs | 2 ++ 1 file changed, 2 insertions(+) diff --git a/virtio-devices/src/mem.rs b/virtio-devices/src/mem.rs index 7893be6b1a..0be5b24f62 100644 --- a/virtio-devices/src/mem.rs +++ b/virtio-devices/src/mem.rs @@ -392,6 +392,8 @@ impl BlocksState { } } + // TODO We can avoid creating a new bitmap here, if we switch the code + // to use Vec to keep dirty bits and just pass it as is. MemoryRangeTable::from_bitmap(bitmap, start_addr, VIRTIO_MEM_DEFAULT_BLOCK_SIZE) } } From a1f9c9853a6940cbfb7bde44a8998201785d8b79 Mon Sep 17 00:00:00 2001 From: Julian Stecklina Date: Fri, 7 Nov 2025 15:05:24 +0100 Subject: [PATCH 226/294] virtio-devices: avoid creating a temporary vector ... by passing the slice along instead. On-behalf-of: SAP julian.stecklina@sap.com Signed-off-by: Julian Stecklina --- virtio-devices/src/vhost_user/vu_common_ctrl.rs | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/virtio-devices/src/vhost_user/vu_common_ctrl.rs b/virtio-devices/src/vhost_user/vu_common_ctrl.rs index 05034d0ec5..2e55782934 100644 --- a/virtio-devices/src/vhost_user/vu_common_ctrl.rs +++ b/virtio-devices/src/vhost_user/vu_common_ctrl.rs @@ -573,12 +573,16 @@ impl VhostUserHandle { // divide it by 8. let len = region.size() / 8; // SAFETY: region is of size len - let bitmap = unsafe { + let bitmap: &[u64] = unsafe { // Cast the pointer to u64 let ptr = region.as_ptr() as *const u64; - std::slice::from_raw_parts(ptr, len).to_vec() + std::slice::from_raw_parts(ptr, len) }; - Ok(MemoryRangeTable::from_bitmap(bitmap, 0, 4096)) + Ok(MemoryRangeTable::from_bitmap( + bitmap.iter().copied(), + 0, + 4096, + )) } else { Err(Error::MissingShmLogRegion) } From 484d4f30ca1018b17e8e505e2073d915d20bdbae Mon Sep 17 00:00:00 2001 From: Julian Stecklina Date: Fri, 7 Nov 2025 15:06:05 +0100 Subject: [PATCH 227/294] vmm: avoid creating large temporary vector during migration ... by just passing the iterator along. For large VMs this bitmap is gigantic. A 12TB VM has 384MB of dirty bitmap. With all these optimizations from the previous commits in place, we see quite the improvement when it comes to scanning the dirty bitmap. For a bitmap with 1% bits (randomly) set, dirty_log() takes: Original code: 2166ms (100.0%) New code: 382ms ( 17.6%) on my system. The sparser the dirty bitmap the faster. Scanning an empty bitmap is 100x faster. For a 5% populated bitmap we are still 3x faster. If someone wants to play with this, there is a benchmark harness here: https://github.com/blitz/chv-bitmap-bench On-behalf-of: SAP julian.stecklina@sap.com Signed-off-by: Julian Stecklina --- vmm/src/memory_manager.rs | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/vmm/src/memory_manager.rs b/vmm/src/memory_manager.rs index c1a7b4159b..a2dafc59e9 100644 --- a/vmm/src/memory_manager.rs +++ b/vmm/src/memory_manager.rs @@ -2615,11 +2615,10 @@ impl Migratable for MemoryManager { } }; - let dirty_bitmap: Vec = vm_dirty_bitmap + let dirty_bitmap = vm_dirty_bitmap .iter() .zip(vmm_dirty_bitmap.iter()) - .map(|(x, y)| x | y) - .collect(); + .map(|(x, y)| x | y); let sub_table = MemoryRangeTable::from_bitmap(dirty_bitmap, r.gpa, 4096); From 4e5536d1c2c397d0edbd7ac7eebdf5ee252afc55 Mon Sep 17 00:00:00 2001 From: Thomas Prescher Date: Mon, 10 Nov 2025 14:22:48 +0100 Subject: [PATCH 228/294] block: virtio-devices: make disk_nsectors a shared atomic This change is a prerequisite for live disk resizing. Before this commit, the epoll-handler threads just got a copy of the sector size which we cannot update during runtime. On-behalf-of: SAP thomas.prescher@sap.com Signed-off-by: Thomas Prescher --- virtio-devices/src/block.rs | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/virtio-devices/src/block.rs b/virtio-devices/src/block.rs index fa16bf86c2..37ef3fe419 100644 --- a/virtio-devices/src/block.rs +++ b/virtio-devices/src/block.rs @@ -135,7 +135,7 @@ struct BlockEpollHandler { queue: Queue, mem: GuestMemoryAtomic, disk_image: Box, - disk_nsectors: u64, + disk_nsectors: Arc, interrupt_cb: Arc, serial: Vec, kill_evt: EventFd, @@ -230,7 +230,7 @@ impl BlockEpollHandler { let result = request.execute_async( desc_chain.memory(), - self.disk_nsectors, + self.disk_nsectors.load(Ordering::SeqCst), self.disk_image.as_mut(), &self.serial, desc_chain.head_index() as u64, @@ -631,7 +631,7 @@ pub struct Block { id: String, disk_image: Box, disk_path: PathBuf, - disk_nsectors: u64, + disk_nsectors: Arc, config: VirtioBlockConfig, writeback: Arc, counters: BlockCounters, @@ -762,7 +762,7 @@ impl Block { id, disk_image, disk_path, - disk_nsectors, + disk_nsectors: Arc::new(AtomicU64::new(disk_nsectors)), config, writeback: Arc::new(AtomicBool::new(true)), counters: BlockCounters::default(), @@ -852,7 +852,7 @@ impl Block { fn state(&self) -> BlockState { BlockState { disk_path: self.disk_path.to_str().unwrap().to_owned(), - disk_nsectors: self.disk_nsectors, + disk_nsectors: self.disk_nsectors.load(Ordering::SeqCst), avail_features: self.common.avail_features, acked_features: self.common.acked_features, config: self.config, @@ -966,7 +966,7 @@ impl VirtioDevice for Block { error!("failed to create new AsyncIo: {}", e); ActivateError::BadActivate })?, - disk_nsectors: self.disk_nsectors, + disk_nsectors: self.disk_nsectors.clone(), interrupt_cb: interrupt_cb.clone(), serial: self.serial.clone(), kill_evt, From 3cf78e976928209e9e52a41d64576990e688d274 Mon Sep 17 00:00:00 2001 From: Thomas Prescher Date: Tue, 11 Nov 2025 09:40:16 +0100 Subject: [PATCH 229/294] vmm: disk resize infrastructure Add basic infrastructure so resize events are propagated to the underlying disk implementation. On-behalf-of: SAP thomas.prescher@sap.com Signed-off-by: Thomas Prescher --- block/src/async_io.rs | 10 ++++++++++ block/src/fixed_vhd_async.rs | 4 ++++ block/src/fixed_vhd_sync.rs | 4 ++++ block/src/qcow_sync.rs | 3 +++ block/src/raw_async.rs | 4 ++++ block/src/raw_async_aio.rs | 4 ++++ block/src/raw_sync.rs | 4 ++++ block/src/vhdx_sync.rs | 4 ++++ virtio-devices/src/block.rs | 38 +++++++++++++++++++++++++++++++++++- vmm/src/api/mod.rs | 2 ++ vmm/src/device_manager.rs | 16 +++++++++++++++ vmm/src/lib.rs | 9 +++++++++ vmm/src/vm.rs | 13 ++++++++++++ 13 files changed, 114 insertions(+), 1 deletion(-) diff --git a/block/src/async_io.rs b/block/src/async_io.rs index aa31c54367..7d44a8c361 100644 --- a/block/src/async_io.rs +++ b/block/src/async_io.rs @@ -18,6 +18,14 @@ pub enum DiskFileError { /// Failed creating a new AsyncIo. #[error("Failed creating a new AsyncIo")] NewAsyncIo(#[source] std::io::Error), + + /// Unsupported operation. + #[error("Unsupported operation")] + Unsupported, + + /// Resize failed + #[error("Resize failed")] + ResizeError, } pub type DiskFileResult = std::result::Result; @@ -61,6 +69,8 @@ pub trait DiskFile: Send { fn topology(&mut self) -> DiskTopology { DiskTopology::default() } + fn resize(&mut self, size: u64) -> DiskFileResult<()>; + /// Returns the file descriptor of the underlying disk image file. /// /// The file descriptor is supposed to be used for `fcntl()` calls but no diff --git a/block/src/fixed_vhd_async.rs b/block/src/fixed_vhd_async.rs index ac02e21bf3..07ad258c4a 100644 --- a/block/src/fixed_vhd_async.rs +++ b/block/src/fixed_vhd_async.rs @@ -34,6 +34,10 @@ impl DiskFile for FixedVhdDiskAsync { ) as Box) } + fn resize(&mut self, _size: u64) -> DiskFileResult<()> { + Err(DiskFileError::Unsupported) + } + fn fd(&mut self) -> BorrowedDiskFd<'_> { BorrowedDiskFd::new(self.0.as_raw_fd()) } diff --git a/block/src/fixed_vhd_sync.rs b/block/src/fixed_vhd_sync.rs index c125710698..0f05c66ad7 100644 --- a/block/src/fixed_vhd_sync.rs +++ b/block/src/fixed_vhd_sync.rs @@ -34,6 +34,10 @@ impl DiskFile for FixedVhdDiskSync { ) as Box) } + fn resize(&mut self, _size: u64) -> DiskFileResult<()> { + Err(DiskFileError::Unsupported) + } + fn fd(&mut self) -> BorrowedDiskFd<'_> { BorrowedDiskFd::new(self.0.as_raw_fd()) } diff --git a/block/src/qcow_sync.rs b/block/src/qcow_sync.rs index cd6a1fb774..36b82e81cc 100644 --- a/block/src/qcow_sync.rs +++ b/block/src/qcow_sync.rs @@ -41,6 +41,9 @@ impl DiskFile for QcowDiskSync { fn fd(&mut self) -> BorrowedDiskFd<'_> { BorrowedDiskFd::new(self.qcow_file.as_raw_fd()) } + fn resize(&mut self, _size: u64) -> DiskFileResult<()> { + Err(DiskFileError::Unsupported) + } } pub struct QcowSync { diff --git a/block/src/raw_async.rs b/block/src/raw_async.rs index 1a582073b0..5da9515f2d 100644 --- a/block/src/raw_async.rs +++ b/block/src/raw_async.rs @@ -47,6 +47,10 @@ impl DiskFile for RawFileDisk { } } + fn resize(&mut self, _size: u64) -> DiskFileResult<()> { + Err(DiskFileError::Unsupported) + } + fn fd(&mut self) -> BorrowedDiskFd<'_> { BorrowedDiskFd::new(self.file.as_raw_fd()) } diff --git a/block/src/raw_async_aio.rs b/block/src/raw_async_aio.rs index 9a74fa41d7..7404e81c81 100644 --- a/block/src/raw_async_aio.rs +++ b/block/src/raw_async_aio.rs @@ -50,6 +50,10 @@ impl DiskFile for RawFileDiskAio { } } + fn resize(&mut self, _size: u64) -> DiskFileResult<()> { + Err(DiskFileError::Unsupported) + } + fn fd(&mut self) -> BorrowedDiskFd<'_> { BorrowedDiskFd::new(self.file.as_raw_fd()) } diff --git a/block/src/raw_sync.rs b/block/src/raw_sync.rs index 6b98147e19..43a9a5b3f0 100644 --- a/block/src/raw_sync.rs +++ b/block/src/raw_sync.rs @@ -47,6 +47,10 @@ impl DiskFile for RawFileDiskSync { fn fd(&mut self) -> BorrowedDiskFd<'_> { BorrowedDiskFd::new(self.file.as_raw_fd()) } + + fn resize(&mut self, _size: u64) -> DiskFileResult<()> { + Err(DiskFileError::Unsupported) + } } pub struct RawFileSync { diff --git a/block/src/vhdx_sync.rs b/block/src/vhdx_sync.rs index 01bcbf5e7f..0028672d36 100644 --- a/block/src/vhdx_sync.rs +++ b/block/src/vhdx_sync.rs @@ -38,6 +38,10 @@ impl DiskFile for VhdxDiskSync { ) } + fn resize(&mut self, _size: u64) -> DiskFileResult<()> { + Err(DiskFileError::Unsupported) + } + fn fd(&mut self) -> BorrowedDiskFd<'_> { BorrowedDiskFd::new(self.vhdx_file.as_raw_fd()) } diff --git a/virtio-devices/src/block.rs b/virtio-devices/src/block.rs index 37ef3fe419..4b3ff81775 100644 --- a/virtio-devices/src/block.rs +++ b/virtio-devices/src/block.rs @@ -18,7 +18,7 @@ use std::sync::{Arc, Barrier}; use std::{io, result}; use anyhow::anyhow; -use block::async_io::{AsyncIo, AsyncIoError, DiskFile}; +use block::async_io::{AsyncIo, AsyncIoError, DiskFile, DiskFileError}; use block::fcntl::{LockError, LockGranularity, LockType, get_lock_state}; use block::{ ExecuteAsync, ExecuteError, Request, RequestType, VirtioBlockConfig, build_serial, fcntl, @@ -93,6 +93,14 @@ pub enum Error { /// The path of the disk image. path: PathBuf, }, + #[error("disk image size is not a multiple of {}", SECTOR_SIZE)] + InvalidSize, + #[error("Failed to pause/resume vcpus")] + FailedPauseResume(#[source] MigratableError), + #[error("Failed signal config interrupt")] + FailedSignalingConfigChange(#[source] io::Error), + #[error("Disk resize failed")] + FailedDiskResize(#[source] DiskFileError), } pub type Result = result::Result; @@ -879,6 +887,34 @@ impl Block { self.writeback.store(writeback, Ordering::Release); } + pub fn resize(&mut self, new_size: u64) -> Result<()> { + if !new_size.is_multiple_of(SECTOR_SIZE) { + return Err(Error::InvalidSize); + } + + self.disk_image + .resize(new_size) + .map_err(Error::FailedDiskResize)?; + + let nsectors = new_size / SECTOR_SIZE; + + self.common.pause().map_err(Error::FailedPauseResume)?; + + self.disk_nsectors.store(nsectors, Ordering::SeqCst); + self.config.capacity = nsectors; + self.state().disk_nsectors = nsectors; + + self.common.resume().map_err(Error::FailedPauseResume)?; + + if let Some(interrupt_cb) = self.common.interrupt_cb.as_ref() { + interrupt_cb + .trigger(VirtioInterruptType::Config) + .map_err(Error::FailedSignalingConfigChange) + } else { + Ok(()) + } + } + #[cfg(fuzzing)] pub fn wait_for_epoll_threads(&mut self) { self.common.wait_for_epoll_threads(); diff --git a/vmm/src/api/mod.rs b/vmm/src/api/mod.rs index 3c45f57d2e..3c9a2a2cc4 100644 --- a/vmm/src/api/mod.rs +++ b/vmm/src/api/mod.rs @@ -330,6 +330,8 @@ pub trait RequestHandler { fn vm_resize_zone(&mut self, id: String, desired_ram: u64) -> Result<(), VmError>; + fn vm_resize_disk(&mut self, id: String, desired_size: u64) -> Result<(), VmError>; + fn vm_add_device(&mut self, device_cfg: DeviceConfig) -> Result>, VmError>; fn vm_add_user_device( diff --git a/vmm/src/device_manager.rs b/vmm/src/device_manager.rs index 3f09c6f828..fa409a27e8 100644 --- a/vmm/src/device_manager.rs +++ b/vmm/src/device_manager.rs @@ -667,6 +667,10 @@ pub enum DeviceManagerError { /// Error adding fw_cfg to bus. #[error("Error adding fw_cfg to bus")] ErrorAddingFwCfgToBus(#[source] vm_device::BusError), + + /// Disk resizing failed. + #[error("Disk resize error")] + DiskResizeError(#[source] virtio_devices::block::Error), } pub type DeviceManagerResult = result::Result; @@ -4915,6 +4919,18 @@ impl DeviceManager { 0 } + pub fn resize_disk(&mut self, device_id: &str, new_size: u64) -> DeviceManagerResult<()> { + for dev in &self.block_devices { + let mut disk = dev.lock().unwrap(); + if disk.id() == device_id { + return disk + .resize(new_size) + .map_err(DeviceManagerError::DiskResizeError); + } + } + Err(DeviceManagerError::UnknownDeviceId(device_id.to_string())) + } + pub fn device_tree(&self) -> Arc> { self.device_tree.clone() } diff --git a/vmm/src/lib.rs b/vmm/src/lib.rs index ed4c8b36f3..d1e4b57abb 100644 --- a/vmm/src/lib.rs +++ b/vmm/src/lib.rs @@ -2221,6 +2221,15 @@ impl RequestHandler for Vmm { } } + fn vm_resize_disk(&mut self, id: String, desired_size: u64) -> result::Result<(), VmError> { + self.vm_config.as_ref().ok_or(VmError::VmNotCreated)?; + + if let Some(ref mut vm) = self.vm { + return vm.resize_disk(id, desired_size); + } + + Err(VmError::ResizeDisk) + } fn vm_resize_zone(&mut self, id: String, desired_ram: u64) -> result::Result<(), VmError> { self.vm_config.as_ref().ok_or(VmError::VmNotCreated)?; diff --git a/vmm/src/vm.rs b/vmm/src/vm.rs index e824a729c5..6007b8add0 100644 --- a/vmm/src/vm.rs +++ b/vmm/src/vm.rs @@ -245,6 +245,9 @@ pub enum Error { #[error("Failed resizing a memory zone")] ResizeZone, + #[error("Failed resizing a disk image")] + ResizeDisk, + #[error("Cannot activate virtio devices")] ActivateVirtioDevices(#[source] DeviceManagerError), @@ -1761,6 +1764,16 @@ impl Vm { Ok(()) } + pub fn resize_disk(&mut self, id: String, desired_size: u64) -> Result<()> { + self.device_manager + .lock() + .unwrap() + .resize_disk(&id, desired_size) + .map_err(Error::DeviceManager)?; + + Ok(()) + } + pub fn resize_zone(&mut self, id: String, desired_memory: u64) -> Result<()> { let memory_config = &mut self.config.lock().unwrap().memory; From 402f7fba8310ec844953b355457ac7d0d4ab489b Mon Sep 17 00:00:00 2001 From: Thomas Prescher Date: Tue, 11 Nov 2025 09:41:48 +0100 Subject: [PATCH 230/294] block: raw_async: implement disk resizing Support for resize events for raw_async disks. On-behalf-of: SAP thomas.prescher@sap.com Signed-off-by: Thomas Prescher --- block/src/raw_async.rs | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/block/src/raw_async.rs b/block/src/raw_async.rs index 5da9515f2d..a982623a59 100644 --- a/block/src/raw_async.rs +++ b/block/src/raw_async.rs @@ -47,8 +47,17 @@ impl DiskFile for RawFileDisk { } } - fn resize(&mut self, _size: u64) -> DiskFileResult<()> { - Err(DiskFileError::Unsupported) + fn resize(&mut self, size: u64) -> DiskFileResult<()> { + let borrowed_fd = self.fd(); + let raw_fd = borrowed_fd.as_raw_fd(); + + // SAFETY: FFI call into libc, trivially safe + let rc = unsafe { libc::ftruncate(raw_fd, size as libc::off_t) }; + if rc == 0 { + Ok(()) + } else { + Err(DiskFileError::ResizeError) + } } fn fd(&mut self) -> BorrowedDiskFd<'_> { From 3cb1cc635f401a4a84e11345d73f917a04d8e025 Mon Sep 17 00:00:00 2001 From: Thomas Prescher Date: Mon, 10 Nov 2025 12:49:17 +0100 Subject: [PATCH 231/294] ch-remote: support live disk resizing Support disk resizing via ch-remote and REST api. On-behalf-of: SAP thomas.prescher@sap.com Signed-off-by: Thomas Prescher --- src/bin/ch-remote.rs | 42 +++++++++++++++++++++++++++ vmm/src/api/http/http_endpoint.rs | 3 +- vmm/src/api/http/mod.rs | 6 +++- vmm/src/api/mod.rs | 48 +++++++++++++++++++++++++++++++ 4 files changed, 97 insertions(+), 2 deletions(-) diff --git a/src/bin/ch-remote.rs b/src/bin/ch-remote.rs index 2614855c45..35d566bd80 100644 --- a/src/bin/ch-remote.rs +++ b/src/bin/ch-remote.rs @@ -320,6 +320,22 @@ fn rest_api_do_command(matches: &ArgMatches, socket: &mut UnixStream) -> ApiResu )?; simple_api_command(socket, "PUT", "resize", Some(&resize)).map_err(Error::HttpApiClient) } + Some("resize-disk") => { + let resize_disk = resize_disk_config( + matches + .subcommand_matches("resize-disk") + .unwrap() + .get_one::("disk") + .unwrap(), + matches + .subcommand_matches("resize-disk") + .unwrap() + .get_one::("size") + .unwrap(), + )?; + simple_api_command(socket, "PUT", "resize-disk", Some(&resize_disk)) + .map_err(Error::HttpApiClient) + } Some("resize-zone") => { let resize_zone = resize_zone_config( matches @@ -782,6 +798,18 @@ fn resize_config( Ok(serde_json::to_string(&resize).unwrap()) } +fn resize_disk_config(id: &str, size: &str) -> Result { + let resize_zone = vmm::api::VmResizeDiskData { + id: id.to_owned(), + desired_size: size + .parse::() + .map_err(Error::InvalidMemorySize)? + .0, + }; + + Ok(serde_json::to_string(&resize_zone).unwrap()) +} + fn resize_zone_config(id: &str, size: &str) -> Result { let resize_zone = vmm::api::VmResizeZoneData { id: id.to_owned(), @@ -1050,6 +1078,20 @@ fn get_cli_commands_sorted() -> Box<[Command]> { .help("New memory size in bytes (supports K/M/G suffix)") .num_args(1), ), + Command::new("resize-disk") + .about("grows/shrinks an attached disk") + .arg( + Arg::new("disk") + .long("disk") + .help("disk identifier") + .num_args(1), + ) + .arg( + Arg::new("size") + .long("size") + .help("new disk size") + .num_args(1), + ), Command::new("resize-zone") .about("Resize a memory zone") .arg( diff --git a/vmm/src/api/http/http_endpoint.rs b/vmm/src/api/http/http_endpoint.rs index 74d7e2f608..0daf7f7af5 100644 --- a/vmm/src/api/http/http_endpoint.rs +++ b/vmm/src/api/http/http_endpoint.rs @@ -44,7 +44,7 @@ use crate::api::{ AddDisk, ApiAction, ApiError, ApiRequest, NetConfig, VmAddDevice, VmAddFs, VmAddNet, VmAddPmem, VmAddUserDevice, VmAddVdpa, VmAddVsock, VmBoot, VmConfig, VmCounters, VmDelete, VmNmi, VmPause, VmPowerButton, VmReboot, VmReceiveMigration, VmReceiveMigrationData, VmRemoveDevice, VmResize, - VmResizeZone, VmRestore, VmResume, VmSendMigration, VmShutdown, VmSnapshot, + VmResizeDisk, VmResizeZone, VmRestore, VmResume, VmSendMigration, VmShutdown, VmSnapshot, }; use crate::config::{RestoreConfig, RestoredNetConfig}; use crate::cpu::Error as CpuError; @@ -227,6 +227,7 @@ vm_action_put_handler_body!(VmAddVdpa); vm_action_put_handler_body!(VmAddVsock); vm_action_put_handler_body!(VmAddUserDevice); vm_action_put_handler_body!(VmRemoveDevice); +vm_action_put_handler_body!(VmResizeDisk); vm_action_put_handler_body!(VmResizeZone); vm_action_put_handler_body!(VmSnapshot); vm_action_put_handler_body!(VmSendMigration); diff --git a/vmm/src/api/http/mod.rs b/vmm/src/api/http/mod.rs index 243429154d..4dfbf7b9b0 100644 --- a/vmm/src/api/http/mod.rs +++ b/vmm/src/api/http/mod.rs @@ -29,7 +29,7 @@ use crate::api::VmCoredump; use crate::api::{ AddDisk, ApiError, ApiRequest, VmAddDevice, VmAddFs, VmAddNet, VmAddPmem, VmAddUserDevice, VmAddVdpa, VmAddVsock, VmBoot, VmCounters, VmDelete, VmNmi, VmPause, VmPowerButton, VmReboot, - VmReceiveMigration, VmRemoveDevice, VmResize, VmResizeZone, VmRestore, VmResume, + VmReceiveMigration, VmRemoveDevice, VmResize, VmResizeDisk, VmResizeZone, VmRestore, VmResume, VmSendMigration, VmShutdown, VmSnapshot, }; use crate::landlock::Landlock; @@ -249,6 +249,10 @@ pub static HTTP_ROUTES: LazyLock = LazyLock::new(|| { endpoint!("/vm.resize"), Box::new(VmActionHandler::new(&VmResize)), ); + r.routes.insert( + endpoint!("/vm.resize-disk"), + Box::new(VmActionHandler::new(&VmResizeDisk)), + ); r.routes.insert( endpoint!("/vm.resize-zone"), Box::new(VmActionHandler::new(&VmResizeZone)), diff --git a/vmm/src/api/mod.rs b/vmm/src/api/mod.rs index 3c9a2a2cc4..84245fd5c0 100644 --- a/vmm/src/api/mod.rs +++ b/vmm/src/api/mod.rs @@ -133,6 +133,10 @@ pub enum ApiError { #[error("The VM could not be resized")] VmResize(#[source] VmError), + /// The disk could not be resized. + #[error("The disk could not be resized")] + VmResizeDisk(#[source] VmError), + /// The memory zone could not be resized. #[error("The memory zone could not be resized")] VmResizeZone(#[source] VmError), @@ -222,6 +226,12 @@ pub struct VmResizeData { pub desired_balloon: Option, } +#[derive(Clone, Deserialize, Serialize, Default, Debug)] +pub struct VmResizeDiskData { + pub id: String, + pub desired_size: u64, +} + #[derive(Clone, Deserialize, Serialize, Default, Debug)] pub struct VmResizeZoneData { pub id: String, @@ -1153,6 +1163,44 @@ impl ApiAction for VmResize { } } +pub struct VmResizeDisk; + +impl ApiAction for VmResizeDisk { + type RequestBody = VmResizeDiskData; + type ResponseBody = Option; + + fn request( + &self, + resize_disk_data: Self::RequestBody, + response_sender: Sender, + ) -> ApiRequest { + Box::new(move |vmm| { + info!("API request event: VmResizeDisk {:?}", resize_disk_data); + println!("xxxxxx"); + + let response = vmm + .vm_resize_disk(resize_disk_data.id, resize_disk_data.desired_size) + .map_err(ApiError::VmResizeDisk) + .map(|_| ApiResponsePayload::Empty); + + response_sender + .send(response) + .map_err(VmmError::ApiResponseSend)?; + + Ok(false) + }) + } + + fn send( + &self, + api_evt: EventFd, + api_sender: Sender, + data: Self::RequestBody, + ) -> ApiResult { + get_response_body(self, api_evt, api_sender, data) + } +} + pub struct VmResizeZone; impl ApiAction for VmResizeZone { From 4804a8579521491613a13da21810e14ba91b1a10 Mon Sep 17 00:00:00 2001 From: Philipp Schuster Date: Wed, 12 Nov 2025 10:19:21 +0100 Subject: [PATCH 232/294] vmm: switch log level (not an error) On-behalf-of: SAP julian.stecklina@sap.com Signed-off-by: Julian Stecklina --- vmm/src/lib.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vmm/src/lib.rs b/vmm/src/lib.rs index d1e4b57abb..3f425b8149 100644 --- a/vmm/src/lib.rs +++ b/vmm/src/lib.rs @@ -972,7 +972,7 @@ impl Vmm { for net_config in vm_config.net.iter_mut().flatten() { // update only if the net dev is backed by FDs if net_config.id.as_ref() == Some(&net.id) && net_config.fds.is_some() { - log::error!( + log::debug!( "overwriting net fds: id={}, old={:?}, new={:?}", net.id, &net_config.fds, From baad3441ceee2df742e03b6862a69d9e51b9809c Mon Sep 17 00:00:00 2001 From: Alyssa Ross Date: Sun, 2 Nov 2025 11:49:45 +0100 Subject: [PATCH 233/294] vmm: seccomp: allow http-server to use sendto Fixes: https://github.com/cloud-hypervisor/cloud-hypervisor/issues/7449 Signed-off-by: Alyssa Ross --- vmm/src/seccomp_filters.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/vmm/src/seccomp_filters.rs b/vmm/src/seccomp_filters.rs index 2d7d3ca11e..948d978dda 100644 --- a/vmm/src/seccomp_filters.rs +++ b/vmm/src/seccomp_filters.rs @@ -860,6 +860,7 @@ fn http_api_thread_rules() -> Result)>, BackendError> (libc::SYS_recvfrom, vec![]), (libc::SYS_recvmsg, vec![]), (libc::SYS_sched_yield, vec![]), + (libc::SYS_sendto, vec![]), (libc::SYS_sigaltstack, vec![]), (libc::SYS_write, vec![]), (libc::SYS_rt_sigprocmask, vec![]), From 0e20355b44ed379f5d0783aff805d91ec8b53686 Mon Sep 17 00:00:00 2001 From: Julian Stecklina Date: Wed, 15 Oct 2025 13:38:18 +0200 Subject: [PATCH 234/294] vm-migration: add connections field to HTTP API This is not wired up to anywhere yet. We will use this to establish multiple connections for live migration. On-behalf-of: SAP julian.stecklina@sap.com Signed-off-by: Julian Stecklina --- src/bin/ch-remote.rs | 23 ++++++++++++++++++++++- vmm/src/api/mod.rs | 19 ++++++++++++++++--- vmm/src/api/openapi/cloud-hypervisor.yaml | 4 ++++ 3 files changed, 42 insertions(+), 4 deletions(-) diff --git a/src/bin/ch-remote.rs b/src/bin/ch-remote.rs index 35d566bd80..92c63900f2 100644 --- a/src/bin/ch-remote.rs +++ b/src/bin/ch-remote.rs @@ -9,6 +9,7 @@ mod test_util; use std::io::Read; use std::marker::PhantomData; +use std::num::NonZeroU32; use std::os::unix::net::UnixStream; use std::process; @@ -505,6 +506,11 @@ fn rest_api_do_command(matches: &ArgMatches, socket: &mut UnixStream) -> ApiResu .unwrap() .get_one::("migration-timeout-s") .unwrap_or(&3600), + *matches + .subcommand_matches("send-migration") + .unwrap() + .get_one::("connections") + .unwrap_or(&NonZeroU32::new(1).unwrap()), ); simple_api_command(socket, "PUT", "send-migration", Some(&send_migration_data)) .map_err(Error::HttpApiClient) @@ -935,12 +941,19 @@ fn receive_migration_data(url: &str) -> String { serde_json::to_string(&receive_migration_data).unwrap() } -fn send_migration_data(url: &str, local: bool, downtime: u64, migration_timeout: u64) -> String { +fn send_migration_data( + url: &str, + local: bool, + downtime: u64, + migration_timeout: u64, + connections: NonZeroU32, +) -> String { let send_migration_data = vmm::api::VmSendMigrationData { destination_url: url.to_owned(), local, downtime, migration_timeout, + connections, }; serde_json::to_string(&send_migration_data).unwrap() @@ -1116,6 +1129,14 @@ fn get_cli_commands_sorted() -> Box<[Command]> { Command::new("resume").about("Resume the VM"), Command::new("send-migration") .about("Initiate a VM migration") + .arg( + Arg::new("connections") + .long("connections") + .help("The number of connections to use for the migration") + .num_args(1) + .value_parser(clap::value_parser!(u32)) + .default_value("1"), + ) .arg( Arg::new("downtime-ms") .long("downtime-ms") diff --git a/vmm/src/api/mod.rs b/vmm/src/api/mod.rs index 84245fd5c0..0b0be98b86 100644 --- a/vmm/src/api/mod.rs +++ b/vmm/src/api/mod.rs @@ -34,6 +34,7 @@ pub mod dbus; pub mod http; use std::io; +use std::num::NonZeroU32; use std::sync::mpsc::{RecvError, SendError, Sender, channel}; use micro_http::Body; @@ -255,7 +256,7 @@ pub struct VmCoredumpData { pub destination_url: String, } -#[derive(Clone, Deserialize, Serialize, Default, Debug)] +#[derive(Clone, Deserialize, Serialize, Debug)] pub struct VmReceiveMigrationData { /// URL for the reception of migration state pub receiver_url: String, @@ -266,9 +267,13 @@ pub struct VmReceiveMigrationData { pub net_fds: Option>, } -#[derive(Clone, Deserialize, Serialize, Default, Debug)] +#[derive(Clone, Deserialize, Serialize, Debug)] pub struct VmSendMigrationData { - /// URL to migrate the VM to + /// URL to migrate the VM to. + /// + /// This is not actually a URL, but we are stuck with the name, because it's + /// part of the HTTP API. The destination is a string, such as + /// tcp:: or unix:/path/to/socket. pub destination_url: String, /// Send memory across socket without copying #[serde(default)] @@ -279,6 +284,9 @@ pub struct VmSendMigrationData { /// Second level migration timeout #[serde(default)] pub migration_timeout: u64, + /// The number of parallel connections for migration + #[serde(default = "default_connections")] + pub connections: NonZeroU32, } // Default value for downtime the same as qemu. @@ -286,6 +294,11 @@ fn default_downtime() -> u64 { 300 } +// We use a single connection for backward compatibility as default. +fn default_connections() -> NonZeroU32 { + NonZeroU32::new(1).unwrap() +} + pub enum ApiResponsePayload { /// No data is sent on the channel. Empty, diff --git a/vmm/src/api/openapi/cloud-hypervisor.yaml b/vmm/src/api/openapi/cloud-hypervisor.yaml index 919555b2c4..80d5812bd6 100644 --- a/vmm/src/api/openapi/cloud-hypervisor.yaml +++ b/vmm/src/api/openapi/cloud-hypervisor.yaml @@ -1245,6 +1245,10 @@ components: - destination_url type: object properties: + connections: + type: integer + format: int64 + default: 1 destination_url: type: string local: From 78cb22d6483441fb307cd15763d9a4f62a05f310 Mon Sep 17 00:00:00 2001 From: Julian Stecklina Date: Thu, 16 Oct 2025 16:21:55 +0200 Subject: [PATCH 235/294] vmm: inline functions in vm_receive_memory This has no functional change, but it is a requirement to remove the lock that used to obtain the MemoryManager instance. On-behalf-of: SAP julian.stecklina@sap.com Signed-off-by: Julian Stecklina --- vmm/src/lib.rs | 38 +++++++++++++++++++++++++++++---- vmm/src/memory_manager.rs | 44 +-------------------------------------- 2 files changed, 35 insertions(+), 47 deletions(-) diff --git a/vmm/src/lib.rs b/vmm/src/lib.rs index 3f425b8149..6cdbf66bbf 100644 --- a/vmm/src/lib.rs +++ b/vmm/src/lib.rs @@ -51,7 +51,10 @@ use signal_hook::iterator::{Handle, Signals}; use thiserror::Error; use tracer::trace_scoped; use vm_memory::bitmap::{AtomicBitmap, BitmapSlice}; -use vm_memory::{ReadVolatile, VolatileMemoryError, VolatileSlice, WriteVolatile}; +use vm_memory::{ + GuestAddress, GuestAddressSpace, GuestMemory, ReadVolatile, VolatileMemoryError, VolatileSlice, + WriteVolatile, +}; use vm_migration::protocol::*; use vm_migration::{Migratable, MigratableError, Pausable, Snapshot, Snapshottable, Transportable}; use vmm_sys_util::eventfd::EventFd; @@ -1217,10 +1220,37 @@ impl Vmm { T: Read + ReadVolatile, { // Read table - let table = MemoryRangeTable::read_from(socket, req.length())?; + let ranges = MemoryRangeTable::read_from(socket, req.length())?; + let mem = memory_manager.guest_memory().memory(); + + for range in ranges.regions() { + let mut offset: u64 = 0; + // Here we are manually handling the retry in case we can't the + // whole region at once because we can't use the implementation + // from vm-memory::GuestMemory of read_exact_from() as it is not + // following the correct behavior. For more info about this issue + // see: https://github.com/rust-vmm/vm-memory/issues/174 + loop { + let bytes_read = mem + .read_volatile_from( + GuestAddress(range.gpa + offset), + socket, + (range.length - offset) as usize, + ) + .map_err(|e| { + MigratableError::MigrateReceive(anyhow!( + "Error receiving memory from socket: {}", + e + )) + })?; + offset += bytes_read as u64; + + if offset == range.length { + break; + } + } + } - // And then read the memory itself - memory_manager.receive_memory_regions(&table, socket)?; Ok(()) } diff --git a/vmm/src/memory_manager.rs b/vmm/src/memory_manager.rs index a2dafc59e9..fbf3e2f5f9 100644 --- a/vmm/src/memory_manager.rs +++ b/vmm/src/memory_manager.rs @@ -38,7 +38,7 @@ use vm_memory::guest_memory::FileOffset; use vm_memory::mmap::MmapRegionError; use vm_memory::{ Address, Error as MmapError, GuestAddress, GuestAddressSpace, GuestMemory, GuestMemoryAtomic, - GuestMemoryError, GuestMemoryRegion, GuestUsize, MmapRegion, ReadVolatile, + GuestMemoryError, GuestMemoryRegion, GuestUsize, MmapRegion, }; use vm_migration::protocol::{MemoryRange, MemoryRangeTable}; use vm_migration::{ @@ -2091,48 +2091,6 @@ impl MemoryManager { debug!("coredump total bytes {}", total_bytes); Ok(()) } - - pub fn receive_memory_regions( - &mut self, - ranges: &MemoryRangeTable, - fd: &mut F, - ) -> std::result::Result<(), MigratableError> - where - F: ReadVolatile, - { - let guest_memory = self.guest_memory(); - let mem = guest_memory.memory(); - - for range in ranges.regions() { - let mut offset: u64 = 0; - // Here we are manually handling the retry in case we can't the - // whole region at once because we can't use the implementation - // from vm-memory::GuestMemory of read_exact_from() as it is not - // following the correct behavior. For more info about this issue - // see: https://github.com/rust-vmm/vm-memory/issues/174 - loop { - let bytes_read = mem - .read_volatile_from( - GuestAddress(range.gpa + offset), - fd, - (range.length - offset) as usize, - ) - .map_err(|e| { - MigratableError::MigrateReceive(anyhow!( - "Error receiving memory from socket: {}", - e - )) - })?; - offset += bytes_read as u64; - - if offset == range.length { - break; - } - } - } - - Ok(()) - } } struct MemoryNotify { From 2e7a6a956211a26c0a9265c234659b2f879c3a32 Mon Sep 17 00:00:00 2001 From: Julian Stecklina Date: Thu, 16 Oct 2025 17:00:08 +0200 Subject: [PATCH 236/294] vmm: keep direct reference to guest memory around ... to avoid having to grab a lock when we receive a chunk of memory over the migration socket. This will come in handy when we have multiple threads for receiving memory. On-behalf-of: SAP julian.stecklina@sap.com Signed-off-by: Julian Stecklina --- vmm/src/lib.rs | 30 +++++++++++++++++++----------- 1 file changed, 19 insertions(+), 11 deletions(-) diff --git a/vmm/src/lib.rs b/vmm/src/lib.rs index 6cdbf66bbf..2e79cc467d 100644 --- a/vmm/src/lib.rs +++ b/vmm/src/lib.rs @@ -52,8 +52,8 @@ use thiserror::Error; use tracer::trace_scoped; use vm_memory::bitmap::{AtomicBitmap, BitmapSlice}; use vm_memory::{ - GuestAddress, GuestAddressSpace, GuestMemory, ReadVolatile, VolatileMemoryError, VolatileSlice, - WriteVolatile, + GuestAddress, GuestAddressSpace, GuestMemory, GuestMemoryAtomic, ReadVolatile, + VolatileMemoryError, VolatileSlice, WriteVolatile, }; use vm_migration::protocol::*; use vm_migration::{Migratable, MigratableError, Pausable, Snapshot, Snapshottable, Transportable}; @@ -741,8 +741,15 @@ enum ReceiveMigrationState { /// We received file descriptors for memory. This can only happen on UNIX domain sockets. MemoryFdsReceived(Vec<(u32, File)>), - /// We received the VM configuration. We keep the memory configuration around to populate guest memory. From this point on, the sender can start sending memory updates. - Configured(Arc>), + /// We received the VM configuration. We keep the memory configuration around to populate guest memory. + /// From this point on, the sender can start sending memory updates. + /// + /// While the memory manager can also be used to populate guest memory, we keep a direct reference to + /// the memory around to populate guest memory without having to acquire a lock. + Configured( + Arc>, + GuestMemoryAtomic, + ), /// Memory is populated and we received the state. The VM is ready to go. StateReceived, @@ -987,7 +994,8 @@ impl Vmm { } } - Ok(Configured(memory_manager)) + let guest_memory = memory_manager.lock().unwrap().guest_memory(); + Ok(Configured(memory_manager, guest_memory)) }; let recv_memory_fd = @@ -1019,13 +1027,13 @@ impl Vmm { Command::Config => configure_vm(socket, HashMap::from_iter(memory_files)), _ => invalid_command(), }, - Configured(memory_manager) => match req.command() { + Configured(memory_manager, guest_memory) => match req.command() { Command::Memory => { - self.vm_receive_memory(req, socket, &mut memory_manager.lock().unwrap())?; - Ok(Configured(memory_manager)) + self.vm_receive_memory(req, socket, &guest_memory)?; + Ok(Configured(memory_manager, guest_memory)) } Command::State => { - self.vm_receive_state(req, socket, memory_manager.clone())?; + self.vm_receive_state(req, socket, memory_manager)?; Ok(StateReceived) } _ => invalid_command(), @@ -1214,14 +1222,14 @@ impl Vmm { &mut self, req: &Request, socket: &mut T, - memory_manager: &mut MemoryManager, + guest_mem: &GuestMemoryAtomic, ) -> std::result::Result<(), MigratableError> where T: Read + ReadVolatile, { // Read table let ranges = MemoryRangeTable::read_from(socket, req.length())?; - let mem = memory_manager.guest_memory().memory(); + let mem = guest_mem.memory(); for range in ranges.regions() { let mut offset: u64 = 0; From 452d3f916137e6415a77f53ef04f9e4379004edc Mon Sep 17 00:00:00 2001 From: Julian Stecklina Date: Fri, 17 Oct 2025 15:24:10 +0200 Subject: [PATCH 237/294] vmm: allow keeping the socket listener around To allow for accepting more connections in the migration receive code paths, we need to keep track of the listener. This commit adds a thin abstraction to be able to hold on to it regardless of whether it is a UNIX domain or TCP socket. On-behalf-of: SAP julian.stecklina@sap.com Signed-off-by: Julian Stecklina --- vmm/src/lib.rs | 83 +++++++++++++++++++++++++++++++------------------- 1 file changed, 51 insertions(+), 32 deletions(-) diff --git a/vmm/src/lib.rs b/vmm/src/lib.rs index 2e79cc467d..da672ff4d4 100644 --- a/vmm/src/lib.rs +++ b/vmm/src/lib.rs @@ -730,6 +730,40 @@ pub struct Vmm { console_info: Option, } +/// Abstract over the different types of listeners that can be used to receive connections. +#[derive(Debug)] +enum ReceiveListener { + Tcp(TcpListener), + Unix(UnixListener, Option), +} + +impl ReceiveListener { + /// Block until a connection is accepted. + fn accept(&mut self) -> std::result::Result { + match self { + ReceiveListener::Tcp(listener) => listener + .accept() + .map(|(socket, _)| SocketStream::Tcp(socket)), + ReceiveListener::Unix(listener, opt_path) => { + let socket = listener + .accept() + .map(|(socket, _)| SocketStream::Unix(socket))?; + + // Remove the UNIX socket file after accepting the connection. Is this actually safe? If a user + // moves the file and creates a new one with the same name, we will delete the wrong file. + // Sounds like a confused deputy to me. + // + // TODO Don't do this? + if let Some(path) = opt_path.take() { + std::fs::remove_file(&path)?; + } + + Ok(socket) + } + } + } +} + /// The receiver's state machine behind the migration protocol. enum ReceiveMigrationState { /// The connection is established and we haven't received any commands yet. @@ -1293,41 +1327,22 @@ impl Vmm { } } - fn receive_migration_socket( + fn receive_migration_listener( receiver_url: &str, - ) -> std::result::Result { + ) -> std::result::Result { if let Some(address) = receiver_url.strip_prefix("tcp:") { - let listener = TcpListener::bind(address).map_err(|e| { - MigratableError::MigrateReceive(anyhow!("Error binding to TCP socket: {}", e)) - })?; - - let (socket, _addr) = listener.accept().map_err(|e| { - MigratableError::MigrateReceive(anyhow!( - "Error accepting connection on TCP socket: {}", - e - )) - })?; - - Ok(SocketStream::Tcp(socket)) + TcpListener::bind(address) + .map_err(|e| { + MigratableError::MigrateReceive(anyhow!("Error binding to TCP socket: {}", e)) + }) + .map(ReceiveListener::Tcp) } else { let path = Vmm::socket_url_to_path(receiver_url)?; - let listener = UnixListener::bind(&path).map_err(|e| { - MigratableError::MigrateReceive(anyhow!("Error binding to UNIX socket: {}", e)) - })?; - - let (socket, _addr) = listener.accept().map_err(|e| { - MigratableError::MigrateReceive(anyhow!( - "Error accepting connection on UNIX socket: {}", - e - )) - })?; - - // Remove the UNIX socket file after accepting the connection - std::fs::remove_file(&path).map_err(|e| { - MigratableError::MigrateReceive(anyhow!("Error removing UNIX socket file: {}", e)) - })?; - - Ok(SocketStream::Unix(socket)) + UnixListener::bind(&path) + .map_err(|e| { + MigratableError::MigrateReceive(anyhow!("Error binding to UNIX socket: {}", e)) + }) + .map(|listener| ReceiveListener::Unix(listener, Some(path))) } } @@ -2574,8 +2589,12 @@ impl RequestHandler for Vmm { receive_data_migration.receiver_url, &receive_data_migration.net_fds ); + let mut listener = Vmm::receive_migration_listener(&receive_data_migration.receiver_url)?; // Accept the connection and get the socket - let mut socket = Vmm::receive_migration_socket(&receive_data_migration.receiver_url)?; + let mut socket = listener.accept().map_err(|e| { + warn!("Failed to accept migration connection: {}", e); + MigratableError::MigrateReceive(anyhow!("Failed to accept migration connection: {}", e)) + })?; let mut state = ReceiveMigrationState::Established; From 48a9f7e735545a23d1e3fe0316a7c2434ee18051 Mon Sep 17 00:00:00 2001 From: Julian Stecklina Date: Mon, 20 Oct 2025 17:55:36 +0200 Subject: [PATCH 238/294] vmm: handle receiving migration data on additional connections We keep the listening socket around and accept as many connections as the sender wants to open. There are still some problems: We never tear these threads down again. We will handle this in subsequent commits. On-behalf-of: SAP julian.stecklina@sap.com Signed-off-by: Julian Stecklina --- vmm/src/lib.rs | 209 ++++++++++++++++++++++++++++++++++++------------- 1 file changed, 154 insertions(+), 55 deletions(-) diff --git a/vmm/src/lib.rs b/vmm/src/lib.rs index da672ff4d4..730c2f24a5 100644 --- a/vmm/src/lib.rs +++ b/vmm/src/lib.rs @@ -762,6 +762,128 @@ impl ReceiveListener { } } } + + fn try_clone(&self) -> std::result::Result { + match self { + ReceiveListener::Tcp(listener) => listener.try_clone().map(ReceiveListener::Tcp), + ReceiveListener::Unix(listener, opt_path) => listener + .try_clone() + .map(|listener| ReceiveListener::Unix(listener, opt_path.clone())), + } + } +} + +/// Handles a `Memory` request by writing its payload to the VM memory. +fn vm_receive_memory( + req: &Request, + socket: &mut T, + guest_mem: &GuestMemoryAtomic, +) -> std::result::Result<(), MigratableError> +where + T: Read + ReadVolatile, +{ + assert_eq!(req.command(), Command::Memory); + + // Read table + let ranges = MemoryRangeTable::read_from(socket, req.length())?; + let mem = guest_mem.memory(); + + for range in ranges.regions() { + let mut offset: u64 = 0; + // Here we are manually handling the retry in case we can't the + // whole region at once because we can't use the implementation + // from vm-memory::GuestMemory of read_exact_from() as it is not + // following the correct behavior. For more info about this issue + // see: https://github.com/rust-vmm/vm-memory/issues/174 + loop { + let bytes_read = mem + .read_volatile_from( + GuestAddress(range.gpa + offset), + socket, + (range.length - offset) as usize, + ) + .map_err(|e| { + MigratableError::MigrateReceive(anyhow!( + "Error receiving memory from socket: {}", + e + )) + })?; + offset += bytes_read as u64; + + if offset == range.length { + break; + } + } + } + + Ok(()) +} + +/// We keep track of additional connections for receiving VM migration data +/// here. +struct ReceiveAdditionalConnections { + accept_thread: std::thread::JoinHandle<()>, +} + +impl ReceiveAdditionalConnections { + /// Starts a thread to accept incoming connections and handle them. These + /// additional connections are used to receive additional memory regions + /// during VM migration. + fn new( + listener: ReceiveListener, + guest_memory: GuestMemoryAtomic, + ) -> std::result::Result { + let accept_thread = std::thread::spawn(move || { + let mut listener = listener; + let mut threads: Vec> = Vec::new(); + while let Ok(mut socket) = listener.accept() { + let guest_memory = guest_memory.clone(); + + // We handle errors locally and log them. Passing them along is + // painful with little value. + threads.push(std::thread::spawn(move || { + loop { + let req = match Request::read_from(&mut socket) { + Ok(req) => req, + Err(e) => { + error!("Failed to read request: {}", e); + break; + } + }; + + if req.command() != Command::Memory { + error!("Dropping connection. Only Memory commands are allowed on additional connections"); + break; + } + + if let Err(e) = vm_receive_memory(&req, &mut socket, &guest_memory) { + error!("Failed to receive memory: {}", e); + break; + } + } + })); + } + + info!("Stopped accepting additional connections. Cleaning up threads."); + threads.into_iter().for_each(|thread| { + thread.join().unwrap(); + }); + }); + + Ok(Self { accept_thread }) + } +} + +impl Drop for ReceiveAdditionalConnections { + fn drop(&mut self) { + // TODO Here we should make sure to shut down the accept thread before + // joining it. This is currently not so easy, because we don't have a + // way to signal the thread to stop accepting connections. + + if !self.accept_thread.is_finished() { + warn!("Accept thread is still running"); + } + } } /// The receiver's state machine behind the migration protocol. @@ -783,6 +905,7 @@ enum ReceiveMigrationState { Configured( Arc>, GuestMemoryAtomic, + ReceiveAdditionalConnections, ), /// Memory is populated and we received the state. The VM is ready to go. @@ -981,6 +1104,7 @@ impl Vmm { /// _not_ write any response to the socket. fn vm_receive_migration_step( &mut self, + listener: &ReceiveListener, socket: &mut SocketStream, state: ReceiveMigrationState, req: &Request, @@ -1029,7 +1153,19 @@ impl Vmm { } let guest_memory = memory_manager.lock().unwrap().guest_memory(); - Ok(Configured(memory_manager, guest_memory)) + Ok(Configured( + memory_manager, + guest_memory.clone(), + listener + .try_clone() + .and_then(|l| ReceiveAdditionalConnections::new(l, guest_memory)) + .map_err(|e| { + MigratableError::MigrateReceive(anyhow!( + "Failed to create receive additional connections: {}", + e + )) + })?, + )) }; let recv_memory_fd = @@ -1061,17 +1197,23 @@ impl Vmm { Command::Config => configure_vm(socket, HashMap::from_iter(memory_files)), _ => invalid_command(), }, - Configured(memory_manager, guest_memory) => match req.command() { - Command::Memory => { - self.vm_receive_memory(req, socket, &guest_memory)?; - Ok(Configured(memory_manager, guest_memory)) - } - Command::State => { - self.vm_receive_state(req, socket, memory_manager)?; - Ok(StateReceived) + Configured(memory_manager, guest_memory, receive_additional_connections) => { + match req.command() { + Command::Memory => { + vm_receive_memory(req, socket, &guest_memory)?; + Ok(Configured( + memory_manager, + guest_memory, + receive_additional_connections, + )) + } + Command::State => { + self.vm_receive_state(req, socket, memory_manager)?; + Ok(StateReceived) + } + _ => invalid_command(), } - _ => invalid_command(), - }, + } StateReceived => match req.command() { Command::Complete => { // The unwrap is safe, because the state machine makes sure we called @@ -1252,50 +1394,6 @@ impl Vmm { Ok(()) } - fn vm_receive_memory( - &mut self, - req: &Request, - socket: &mut T, - guest_mem: &GuestMemoryAtomic, - ) -> std::result::Result<(), MigratableError> - where - T: Read + ReadVolatile, - { - // Read table - let ranges = MemoryRangeTable::read_from(socket, req.length())?; - let mem = guest_mem.memory(); - - for range in ranges.regions() { - let mut offset: u64 = 0; - // Here we are manually handling the retry in case we can't the - // whole region at once because we can't use the implementation - // from vm-memory::GuestMemory of read_exact_from() as it is not - // following the correct behavior. For more info about this issue - // see: https://github.com/rust-vmm/vm-memory/issues/174 - loop { - let bytes_read = mem - .read_volatile_from( - GuestAddress(range.gpa + offset), - socket, - (range.length - offset) as usize, - ) - .map_err(|e| { - MigratableError::MigrateReceive(anyhow!( - "Error receiving memory from socket: {}", - e - )) - })?; - offset += bytes_read as u64; - - if offset == range.length { - break; - } - } - } - - Ok(()) - } - fn socket_url_to_path(url: &str) -> result::Result { url.strip_prefix("unix:") .ok_or_else(|| { @@ -2603,6 +2701,7 @@ impl RequestHandler for Vmm { trace!("Command {:?} received", req.command()); let (response, new_state) = match self.vm_receive_migration_step( + &listener, &mut socket, state, &req, From fca89bb06cae3e19d1f20ee4b8834e96fb3d623c Mon Sep 17 00:00:00 2001 From: Julian Stecklina Date: Mon, 20 Oct 2025 18:33:28 +0200 Subject: [PATCH 239/294] vmm: use the same function to send memory In anticipation of using multiple threads for sending memory, refactor the sending code to be in a single place. On-behalf-of: SAP julian.stecklina@sap.com Signed-off-by: Julian Stecklina --- vmm/src/lib.rs | 29 ++++++++++------------------- 1 file changed, 10 insertions(+), 19 deletions(-) diff --git a/vmm/src/lib.rs b/vmm/src/lib.rs index 730c2f24a5..99faa8ef4a 100644 --- a/vmm/src/lib.rs +++ b/vmm/src/lib.rs @@ -1444,27 +1444,27 @@ impl Vmm { } } + // Send memory from the given table. // Returns true if there were dirty pages to send - fn vm_maybe_send_dirty_pages( + fn vm_send_memory( vm: &mut Vm, socket: &mut SocketStream, - table: MemoryRangeTable, - ) -> result::Result { - // But if there are no regions go straight to pause + table: &MemoryRangeTable, + ) -> result::Result<(), MigratableError> { if table.regions().is_empty() { - return Ok(false); + return Ok(()); } Request::memory(table.length()).write_to(socket).unwrap(); table.write_to(socket)?; // And then the memory itself - vm.send_memory_regions(&table, socket)?; + vm.send_memory_regions(table, socket)?; Response::read_from(socket)?.ok_or_abandon( socket, MigratableError::MigrateSend(anyhow!("Error during dirty memory migration")), )?; - Ok(true) + Ok(()) } fn can_increase_autoconverge_step(s: &MigrationState) -> bool { @@ -1543,7 +1543,7 @@ impl Vmm { // Send the current dirty pages let transfer_start = Instant::now(); - Self::vm_maybe_send_dirty_pages(vm, socket, iteration_table.clone())?; + Self::vm_send_memory(vm, socket, &iteration_table)?; let transfer_time = transfer_start.elapsed().as_millis() as f64; // Update bandwidth @@ -1579,16 +1579,7 @@ impl Vmm { // Start logging dirty pages vm.start_dirty_log()?; - // Send memory table - let table = vm.memory_range_table()?; - Request::memory(table.length()).write_to(socket).unwrap(); - table.write_to(socket)?; - // And then the memory itself - vm.send_memory_regions(&table, socket)?; - Response::read_from(socket)?.ok_or_abandon( - socket, - MigratableError::MigrateSend(anyhow!("Error during dirty memory migration")), - )?; + Self::vm_send_memory(vm, socket, &vm.memory_range_table()?)?; // Define the maximum allowed downtime 2000 seconds(2000000 milliseconds) const MAX_MIGRATE_DOWNTIME: u64 = 2000000; @@ -1630,7 +1621,7 @@ impl Vmm { // Send last batch of dirty pages let mut final_table = vm.dirty_log()?; final_table.extend(iteration_table.clone()); - Self::vm_maybe_send_dirty_pages(vm, socket, final_table.clone())?; + Self::vm_send_memory(vm, socket, &final_table)?; // Update statistics s.pending_size = final_table.regions().iter().map(|range| range.length).sum(); From 61544a6b471c14e3038b284c3a95f74922e8166d Mon Sep 17 00:00:00 2001 From: Julian Stecklina Date: Tue, 21 Oct 2025 14:14:51 +0200 Subject: [PATCH 240/294] vmm: move socket creation out of impl Vmm ... to be able to re-use it when establishing multiple send connections. I moved the receive socket creation out for symmetry. On-behalf-of: SAP julian.stecklina@sap.com Signed-off-by: Julian Stecklina --- vmm/src/lib.rs | 104 ++++++++++++++++++++++++------------------------- 1 file changed, 52 insertions(+), 52 deletions(-) diff --git a/vmm/src/lib.rs b/vmm/src/lib.rs index 99faa8ef4a..0c200fc07e 100644 --- a/vmm/src/lib.rs +++ b/vmm/src/lib.rs @@ -927,6 +927,56 @@ impl ReceiveMigrationState { } } +/// Establishes a connection to a migration destination socket (TCP or UNIX). +fn send_migration_socket( + destination_url: &str, +) -> std::result::Result { + if let Some(address) = destination_url.strip_prefix("tcp:") { + info!("Connecting to TCP socket at {}", address); + + let socket = TcpStream::connect(address).map_err(|e| { + MigratableError::MigrateSend(anyhow!("Error connecting to TCP socket: {}", e)) + })?; + + Ok(SocketStream::Tcp(socket)) + } else if let Some(path) = destination_url.strip_prefix("unix:") { + info!("Connecting to UNIX socket at {:?}", path); + + let socket = UnixStream::connect(path).map_err(|e| { + MigratableError::MigrateSend(anyhow!("Error connecting to UNIX socket: {}", e)) + })?; + + Ok(SocketStream::Unix(socket)) + } else { + Err(MigratableError::MigrateSend(anyhow!( + "Invalid destination: {destination_url}" + ))) + } +} + +/// Creates a listener socket for receiving incoming migration connections (TCP or UNIX). +fn receive_migration_listener( + receiver_url: &str, +) -> std::result::Result { + if let Some(address) = receiver_url.strip_prefix("tcp:") { + TcpListener::bind(address) + .map_err(|e| { + MigratableError::MigrateReceive(anyhow!("Error binding to TCP socket: {}", e)) + }) + .map(ReceiveListener::Tcp) + } else if let Some(path) = receiver_url.strip_prefix("unix:") { + UnixListener::bind(path) + .map_err(|e| { + MigratableError::MigrateReceive(anyhow!("Error binding to UNIX socket: {}", e)) + }) + .map(|listener| ReceiveListener::Unix(listener, Some(path.into()))) + } else { + Err(MigratableError::MigrateSend(anyhow!( + "Invalid source: {receiver_url}" + ))) + } +} + impl Vmm { pub const HANDLED_SIGNALS: [i32; 2] = [SIGTERM, SIGINT]; @@ -1394,56 +1444,6 @@ impl Vmm { Ok(()) } - fn socket_url_to_path(url: &str) -> result::Result { - url.strip_prefix("unix:") - .ok_or_else(|| { - MigratableError::MigrateSend(anyhow!("Could not extract path from URL: {}", url)) - }) - .map(|s| s.into()) - } - - fn send_migration_socket( - destination_url: &str, - ) -> std::result::Result { - if let Some(address) = destination_url.strip_prefix("tcp:") { - info!("Connecting to TCP socket at {}", address); - - let socket = TcpStream::connect(address).map_err(|e| { - MigratableError::MigrateSend(anyhow!("Error connecting to TCP socket: {}", e)) - })?; - - Ok(SocketStream::Tcp(socket)) - } else { - let path = Vmm::socket_url_to_path(destination_url)?; - info!("Connecting to UNIX socket at {:?}", path); - - let socket = UnixStream::connect(&path).map_err(|e| { - MigratableError::MigrateSend(anyhow!("Error connecting to UNIX socket: {}", e)) - })?; - - Ok(SocketStream::Unix(socket)) - } - } - - fn receive_migration_listener( - receiver_url: &str, - ) -> std::result::Result { - if let Some(address) = receiver_url.strip_prefix("tcp:") { - TcpListener::bind(address) - .map_err(|e| { - MigratableError::MigrateReceive(anyhow!("Error binding to TCP socket: {}", e)) - }) - .map(ReceiveListener::Tcp) - } else { - let path = Vmm::socket_url_to_path(receiver_url)?; - UnixListener::bind(&path) - .map_err(|e| { - MigratableError::MigrateReceive(anyhow!("Error binding to UNIX socket: {}", e)) - }) - .map(|listener| ReceiveListener::Unix(listener, Some(path))) - } - } - // Send memory from the given table. // Returns true if there were dirty pages to send fn vm_send_memory( @@ -1645,7 +1645,7 @@ impl Vmm { let mut s = MigrationState::new(); // Set up the socket connection - let mut socket = Self::send_migration_socket(&send_data_migration.destination_url)?; + let mut socket = send_migration_socket(&send_data_migration.destination_url)?; // Start the migration Request::start().write_to(&mut socket)?; @@ -2678,7 +2678,7 @@ impl RequestHandler for Vmm { receive_data_migration.receiver_url, &receive_data_migration.net_fds ); - let mut listener = Vmm::receive_migration_listener(&receive_data_migration.receiver_url)?; + let mut listener = receive_migration_listener(&receive_data_migration.receiver_url)?; // Accept the connection and get the socket let mut socket = listener.accept().map_err(|e| { warn!("Failed to accept migration connection: {}", e); From 00ea9893a83f6c2ccf5c6fd271530b2f2d6c4d1a Mon Sep 17 00:00:00 2001 From: Julian Stecklina Date: Tue, 21 Oct 2025 15:05:25 +0200 Subject: [PATCH 241/294] vmm: move sending memory to the other migration code ... to simplify sending memory from multiple connections in future commits. On-behalf-of: SAP julian.stecklina@sap.com Signed-off-by: Julian Stecklina --- vmm/src/lib.rs | 90 +++++++++++++++++++++++++++++++++++--------------- vmm/src/vm.rs | 46 ++------------------------ 2 files changed, 67 insertions(+), 69 deletions(-) diff --git a/vmm/src/lib.rs b/vmm/src/lib.rs index 0c200fc07e..a085ec2a24 100644 --- a/vmm/src/lib.rs +++ b/vmm/src/lib.rs @@ -977,6 +977,67 @@ fn receive_migration_listener( } } +fn send_memory_regions( + guest_memory: &GuestMemoryAtomic, + ranges: &MemoryRangeTable, + fd: &mut SocketStream, +) -> std::result::Result<(), MigratableError> { + let mem = guest_memory.memory(); + + for range in ranges.regions() { + let mut offset: u64 = 0; + // Here we are manually handling the retry in case we can't the + // whole region at once because we can't use the implementation + // from vm-memory::GuestMemory of write_all_to() as it is not + // following the correct behavior. For more info about this issue + // see: https://github.com/rust-vmm/vm-memory/issues/174 + loop { + let bytes_written = mem + .write_volatile_to( + GuestAddress(range.gpa + offset), + fd, + (range.length - offset) as usize, + ) + .map_err(|e| { + MigratableError::MigrateSend(anyhow!( + "Error transferring memory to socket: {}", + e + )) + })?; + offset += bytes_written as u64; + + if offset == range.length { + break; + } + } + } + + Ok(()) +} + +// Send memory from the given table. +// Returns true if there were dirty pages to send +fn vm_send_memory( + guest_memory: &GuestMemoryAtomic, + socket: &mut SocketStream, + table: &MemoryRangeTable, +) -> result::Result<(), MigratableError> { + if table.regions().is_empty() { + return Ok(()); + } + + Request::memory(table.length()).write_to(socket).unwrap(); + table.write_to(socket)?; + // And then the memory itself + send_memory_regions(guest_memory, table, socket)?; + Response::read_from(socket)?.ok_or_abandon( + socket, + MigratableError::MigrateSend(anyhow!("Error during dirty memory migration")), + )?; + + Ok(()) +} + impl Vmm { pub const HANDLED_SIGNALS: [i32; 2] = [SIGTERM, SIGINT]; @@ -1444,29 +1505,6 @@ impl Vmm { Ok(()) } - // Send memory from the given table. - // Returns true if there were dirty pages to send - fn vm_send_memory( - vm: &mut Vm, - socket: &mut SocketStream, - table: &MemoryRangeTable, - ) -> result::Result<(), MigratableError> { - if table.regions().is_empty() { - return Ok(()); - } - - Request::memory(table.length()).write_to(socket).unwrap(); - table.write_to(socket)?; - // And then the memory itself - vm.send_memory_regions(table, socket)?; - Response::read_from(socket)?.ok_or_abandon( - socket, - MigratableError::MigrateSend(anyhow!("Error during dirty memory migration")), - )?; - - Ok(()) - } - fn can_increase_autoconverge_step(s: &MigrationState) -> bool { if s.iteration < AUTO_CONVERGE_ITERATION_DELAY { false @@ -1543,7 +1581,7 @@ impl Vmm { // Send the current dirty pages let transfer_start = Instant::now(); - Self::vm_send_memory(vm, socket, &iteration_table)?; + vm_send_memory(&vm.guest_memory(), socket, &iteration_table)?; let transfer_time = transfer_start.elapsed().as_millis() as f64; // Update bandwidth @@ -1579,7 +1617,7 @@ impl Vmm { // Start logging dirty pages vm.start_dirty_log()?; - Self::vm_send_memory(vm, socket, &vm.memory_range_table()?)?; + vm_send_memory(&vm.guest_memory(), socket, &vm.memory_range_table()?)?; // Define the maximum allowed downtime 2000 seconds(2000000 milliseconds) const MAX_MIGRATE_DOWNTIME: u64 = 2000000; @@ -1621,7 +1659,7 @@ impl Vmm { // Send last batch of dirty pages let mut final_table = vm.dirty_log()?; final_table.extend(iteration_table.clone()); - Self::vm_send_memory(vm, socket, &final_table)?; + vm_send_memory(&vm.guest_memory(), socket, &final_table)?; // Update statistics s.pending_size = final_table.regions().iter().map(|range| range.length).sum(); diff --git a/vmm/src/vm.rs b/vmm/src/vm.rs index 6007b8add0..6e325d9c46 100644 --- a/vmm/src/vm.rs +++ b/vmm/src/vm.rs @@ -64,9 +64,7 @@ use tracer::trace_scoped; use vm_device::Bus; #[cfg(feature = "tdx")] use vm_memory::{Address, ByteValued, GuestMemoryRegion, ReadVolatile}; -use vm_memory::{ - Bytes, GuestAddress, GuestAddressSpace, GuestMemory, GuestMemoryAtomic, WriteVolatile, -}; +use vm_memory::{Bytes, GuestAddress, GuestAddressSpace, GuestMemory, GuestMemoryAtomic}; use vm_migration::protocol::{MemoryRangeTable, Request, Response}; use vm_migration::{ Migratable, MigratableError, Pausable, Snapshot, Snapshottable, Transportable, snapshot_from_id, @@ -2604,46 +2602,8 @@ impl Vm { Ok(()) } - pub fn send_memory_regions( - &mut self, - ranges: &MemoryRangeTable, - fd: &mut F, - ) -> std::result::Result<(), MigratableError> - where - F: WriteVolatile, - { - let guest_memory = self.memory_manager.lock().as_ref().unwrap().guest_memory(); - let mem = guest_memory.memory(); - - for range in ranges.regions() { - let mut offset: u64 = 0; - // Here we are manually handling the retry in case we can't the - // whole region at once because we can't use the implementation - // from vm-memory::GuestMemory of write_all_to() as it is not - // following the correct behavior. For more info about this issue - // see: https://github.com/rust-vmm/vm-memory/issues/174 - loop { - let bytes_written = mem - .write_volatile_to( - GuestAddress(range.gpa + offset), - fd, - (range.length - offset) as usize, - ) - .map_err(|e| { - MigratableError::MigrateSend(anyhow!( - "Error transferring memory to socket: {}", - e - )) - })?; - offset += bytes_written as u64; - - if offset == range.length { - break; - } - } - } - - Ok(()) + pub fn guest_memory(&self) -> GuestMemoryAtomic { + self.memory_manager.lock().unwrap().guest_memory() } pub fn memory_range_table(&self) -> std::result::Result { From 8ad0d292065313aee11dcc528833ec4f0de12605 Mon Sep 17 00:00:00 2001 From: Julian Stecklina Date: Tue, 21 Oct 2025 16:38:28 +0200 Subject: [PATCH 242/294] vm-migration: allow partitioning memory tables For sending memory over multiple connections, we need a way to split up the work. With these changes, we can take a memory table and chop it into same-sized chunks for transmit. On-behalf-of: SAP julian.stecklina@sap.com Signed-off-by: Julian Stecklina --- vm-migration/src/protocol.rs | 136 ++++++++++++++++++++++++++++++++++- 1 file changed, 134 insertions(+), 2 deletions(-) diff --git a/vm-migration/src/protocol.rs b/vm-migration/src/protocol.rs index 2eeb155927..68684b4781 100644 --- a/vm-migration/src/protocol.rs +++ b/vm-migration/src/protocol.rs @@ -211,7 +211,7 @@ impl Response { } #[repr(C)] -#[derive(Clone, Default, Serialize, Deserialize)] +#[derive(Debug, Clone, Default, Serialize, Deserialize, PartialEq, Eq)] pub struct MemoryRange { pub gpa: u64, pub length: u64, @@ -244,12 +244,85 @@ impl MemoryRange { } } -#[derive(Clone, Default, Serialize, Deserialize)] +#[derive(Debug, Clone, Default, Serialize, Deserialize)] pub struct MemoryRangeTable { data: Vec, } +#[derive(Debug, Clone, Default)] +struct MemoryRangeTableIterator { + chunk_size: u64, + data: Vec, +} + +impl MemoryRangeTableIterator { + pub fn new(table: &MemoryRangeTable, chunk_size: u64) -> Self { + MemoryRangeTableIterator { + chunk_size, + data: table.data.clone(), + } + } +} + +impl Iterator for MemoryRangeTableIterator { + type Item = MemoryRangeTable; + + /// Return the next memory range in the table, making sure that + /// the returned range is not larger than `chunk_size`. + /// + /// **Note**: Do not rely on the order of the ranges returned by this + /// iterator. This allows for a more efficient implementation. + fn next(&mut self) -> Option { + let mut ranges: Vec = vec![]; + let mut ranges_size: u64 = 0; + + loop { + assert!(ranges_size <= self.chunk_size); + + if ranges_size == self.chunk_size || self.data.is_empty() { + break; + } + + if let Some(range) = self.data.pop() { + let next_range: MemoryRange = if ranges_size + range.length > self.chunk_size { + // How many bytes we need to put back into the table. + let leftover_bytes = ranges_size + range.length - self.chunk_size; + assert!(leftover_bytes <= range.length); + let returned_bytes = range.length - leftover_bytes; + assert!(returned_bytes <= range.length); + assert!(leftover_bytes + returned_bytes == range.length); + + self.data.push(MemoryRange { + gpa: range.gpa + returned_bytes, + length: leftover_bytes, + }); + MemoryRange { + gpa: range.gpa, + length: returned_bytes, + } + } else { + range + }; + + ranges_size += next_range.length; + ranges.push(next_range); + } + } + + if ranges.is_empty() { + None + } else { + Some(MemoryRangeTable { data: ranges }) + } + } +} + impl MemoryRangeTable { + /// Partitions the table into chunks of at most `chunk_size` bytes. + pub fn partition(&self, chunk_size: u64) -> impl Iterator { + MemoryRangeTableIterator::new(self, chunk_size) + } + pub fn from_bitmap( bitmap: impl IntoIterator, start_addr: u64, @@ -316,3 +389,62 @@ impl MemoryRangeTable { Self { data } } } + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_memory_range_table() { + let mut table = MemoryRangeTable::default(); + // Test blocks that are shorter than the chunk size. + table.push(MemoryRange { + gpa: 0, + length: 1 << 10, + }); + // Test blocks that are longer than the chunk size. + table.push(MemoryRange { + gpa: 0x1000, + length: 3 << 20, + }); + // And add another blocks, so we get a chunk that spans two memory + // ranges. + table.push(MemoryRange { + gpa: 4 << 20, + length: 1 << 20, + }); + + let table = table; // drop mut + + let chunks = table + .partition(2 << 20) + .map(|table| table.data) + .collect::>(); + + // The implementation currently returns the ranges in reverse order. If + // this tests becomes more complex, we can compare everything as sets. + assert_eq!( + chunks, + vec![ + vec![ + MemoryRange { + gpa: 4 << 20, + length: 1 << 20 + }, + MemoryRange { + gpa: 0x1000, + length: 1 << 20 + } + ], + vec![MemoryRange { + gpa: 0x1000 + (1 << 20), + length: 2 << 20 + },], + vec![MemoryRange { + gpa: 0, + length: 1 << 10 + }] + ] + ); + } +} From 6d6547622ea361ab73e6d115c7bcfcce9c05d822 Mon Sep 17 00:00:00 2001 From: Julian Stecklina Date: Tue, 21 Oct 2025 15:48:40 +0200 Subject: [PATCH 243/294] vmm: funnel memory data via additional connections abstraction This does not actually use the additional connections yet, but we are getting closer! On-behalf-of: SAP julian.stecklina@sap.com Signed-off-by: Julian Stecklina --- vmm/src/lib.rs | 143 +++++++++++++++++++++++++++++++++++++++---------- 1 file changed, 114 insertions(+), 29 deletions(-) diff --git a/vmm/src/lib.rs b/vmm/src/lib.rs index a085ec2a24..34eac3e5dc 100644 --- a/vmm/src/lib.rs +++ b/vmm/src/lib.rs @@ -21,6 +21,7 @@ use std::collections::HashMap; use std::fs::File; use std::io::{Read, Write, stdout}; use std::net::{TcpListener, TcpStream}; +use std::num::NonZeroU32; use std::os::unix::io::{AsRawFd, FromRawFd, RawFd}; use std::os::unix::net::{UnixListener, UnixStream}; use std::panic::AssertUnwindSafe; @@ -927,6 +928,101 @@ impl ReceiveMigrationState { } } +/// This struct keeps track of additional threads we use to send VM memory. +struct SendAdditionalConnections { + guest_memory: GuestMemoryAtomic, + threads: Vec>, + channels: Vec>, +} + +/// Send memory from the given table. +fn vm_send_memory( + guest_memory: &GuestMemoryAtomic, + socket: &mut SocketStream, + table: &MemoryRangeTable, +) -> result::Result<(), MigratableError> { + if table.regions().is_empty() { + return Ok(()); + } + + Request::memory(table.length()).write_to(socket).unwrap(); + table.write_to(socket)?; + // And then the memory itself + send_memory_regions(guest_memory, table, socket)?; + Response::read_from(socket)?.ok_or_abandon( + socket, + MigratableError::MigrateSend(anyhow!("Error during dirty memory migration")), + )?; + + Ok(()) +} + +impl SendAdditionalConnections { + fn new( + destination: &str, + connections: NonZeroU32, + guest_mem: &GuestMemoryAtomic, + ) -> std::result::Result { + let mut threads = Vec::new(); + let mut channels = Vec::new(); + + for _ in 0..(connections.get() - 1) { + let socket = send_migration_socket(destination)?; + let guest_mem = guest_mem.clone(); + let (send, recv) = std::sync::mpsc::channel::(); + + let thread = thread::spawn(move || { + let mut socket = socket; + while let Ok(table) = recv.recv() { + vm_send_memory(&guest_mem, &mut socket, &table).unwrap() + } + }); + + threads.push(thread); + channels.push(send); + } + + Ok(Self { + guest_memory: guest_mem.clone(), + threads, + channels, + }) + } + + /// Send memory via all connections that we have. This may be just one. + /// `socket` is the original socket that was used to connect to the + /// destination. + fn send_memory( + &self, + table: &MemoryRangeTable, + socket: &mut SocketStream, + ) -> std::result::Result<(), MigratableError> { + warn!("Not sending via multiple connections yet"); + + Request::memory(table.length()).write_to(socket).unwrap(); + table.write_to(socket)?; + // And then the memory itself + send_memory_regions(&self.guest_memory, table, socket)?; + Response::read_from(socket)?.ok_or_abandon( + socket, + MigratableError::MigrateSend(anyhow!("Error during dirty memory migration")), + )?; + + Ok(()) + } +} + +impl Drop for SendAdditionalConnections { + fn drop(&mut self) { + // Drop all channels, so the threads will exit. + self.channels.clear(); + + self.threads + .drain(..) + .for_each(|thread| thread.join().unwrap()); + } +} + /// Establishes a connection to a migration destination socket (TCP or UNIX). fn send_migration_socket( destination_url: &str, @@ -1015,29 +1111,6 @@ fn send_memory_regions( Ok(()) } -// Send memory from the given table. -// Returns true if there were dirty pages to send -fn vm_send_memory( - guest_memory: &GuestMemoryAtomic, - socket: &mut SocketStream, - table: &MemoryRangeTable, -) -> result::Result<(), MigratableError> { - if table.regions().is_empty() { - return Ok(()); - } - - Request::memory(table.length()).write_to(socket).unwrap(); - table.write_to(socket)?; - // And then the memory itself - send_memory_regions(guest_memory, table, socket)?; - Response::read_from(socket)?.ok_or_abandon( - socket, - MigratableError::MigrateSend(anyhow!("Error during dirty memory migration")), - )?; - - Ok(()) -} - impl Vmm { pub const HANDLED_SIGNALS: [i32; 2] = [SIGTERM, SIGINT]; @@ -1516,6 +1589,7 @@ impl Vmm { fn memory_copy_iterations( vm: &mut Vm, + mem_send: &SendAdditionalConnections, socket: &mut SocketStream, s: &mut MigrationState, migration_timeout: Duration, @@ -1581,7 +1655,7 @@ impl Vmm { // Send the current dirty pages let transfer_start = Instant::now(); - vm_send_memory(&vm.guest_memory(), socket, &iteration_table)?; + mem_send.send_memory(&iteration_table, socket)?; let transfer_time = transfer_start.elapsed().as_millis() as f64; // Update bandwidth @@ -1614,10 +1688,16 @@ impl Vmm { s: &mut MigrationState, send_data_migration: &VmSendMigrationData, ) -> result::Result<(), MigratableError> { + let mem_send = SendAdditionalConnections::new( + &send_data_migration.destination_url, + send_data_migration.connections, + &vm.guest_memory(), + )?; + // Start logging dirty pages vm.start_dirty_log()?; - vm_send_memory(&vm.guest_memory(), socket, &vm.memory_range_table()?)?; + mem_send.send_memory(&vm.memory_range_table()?, socket)?; // Define the maximum allowed downtime 2000 seconds(2000000 milliseconds) const MAX_MIGRATE_DOWNTIME: u64 = 2000000; @@ -1643,8 +1723,14 @@ impl Vmm { ))); } - let iteration_table = - Self::memory_copy_iterations(vm, socket, s, migration_timeout, migrate_downtime_limit)?; + let iteration_table = Self::memory_copy_iterations( + vm, + &mem_send, + socket, + s, + migration_timeout, + migrate_downtime_limit, + )?; info!("Entering downtime phase"); s.downtime_start = Instant::now(); @@ -1659,8 +1745,7 @@ impl Vmm { // Send last batch of dirty pages let mut final_table = vm.dirty_log()?; final_table.extend(iteration_table.clone()); - vm_send_memory(&vm.guest_memory(), socket, &final_table)?; - + mem_send.send_memory(&final_table, socket)?; // Update statistics s.pending_size = final_table.regions().iter().map(|range| range.length).sum(); s.total_transferred_bytes += s.pending_size; From cd18fab3cb0ae007226aa1fce1571a6ba0c45483 Mon Sep 17 00:00:00 2001 From: Julian Stecklina Date: Tue, 21 Oct 2025 17:36:26 +0200 Subject: [PATCH 244/294] vmm: actually send memory using multiple connections On-behalf-of: SAP julian.stecklina@sap.com Signed-off-by: Julian Stecklina --- vm-migration/src/protocol.rs | 4 ++ vmm/src/lib.rs | 134 ++++++++++++++++++++++++++++++----- 2 files changed, 121 insertions(+), 17 deletions(-) diff --git a/vm-migration/src/protocol.rs b/vm-migration/src/protocol.rs index 68684b4781..f9cb3e4188 100644 --- a/vm-migration/src/protocol.rs +++ b/vm-migration/src/protocol.rs @@ -318,6 +318,10 @@ impl Iterator for MemoryRangeTableIterator { } impl MemoryRangeTable { + pub fn ranges(&self) -> &[MemoryRange] { + &self.data + } + /// Partitions the table into chunks of at most `chunk_size` bytes. pub fn partition(&self, chunk_size: u64) -> impl Iterator { MemoryRangeTableIterator::new(self, chunk_size) diff --git a/vmm/src/lib.rs b/vmm/src/lib.rs index 34eac3e5dc..0dac6d2de6 100644 --- a/vmm/src/lib.rs +++ b/vmm/src/lib.rs @@ -27,8 +27,8 @@ use std::os::unix::net::{UnixListener, UnixStream}; use std::panic::AssertUnwindSafe; use std::path::PathBuf; use std::rc::Rc; -use std::sync::mpsc::{Receiver, RecvError, SendError, Sender}; -use std::sync::{Arc, Mutex}; +use std::sync::mpsc::{Receiver, RecvError, SendError, Sender, TrySendError}; +use std::sync::{Arc, Barrier, Mutex}; #[cfg(not(target_arch = "riscv64"))] use std::time::{Duration, Instant}; use std::{io, result, thread}; @@ -861,6 +861,11 @@ impl ReceiveAdditionalConnections { error!("Failed to receive memory: {}", e); break; } + + if let Err(e) = Response::ok().write_to(&mut socket) { + error!("Failed to send response: {}", e); + break; + } } })); } @@ -928,11 +933,19 @@ impl ReceiveMigrationState { } } +/// The different kinds of messages we can send to memory sending threads. +#[derive(Debug)] +enum SendMemoryThreadMessage { + Memory(Arc), + Barrier(Arc), + Disconnect, +} + /// This struct keeps track of additional threads we use to send VM memory. struct SendAdditionalConnections { guest_memory: GuestMemoryAtomic, threads: Vec>, - channels: Vec>, + channels: Vec>, } /// Send memory from the given table. @@ -958,6 +971,27 @@ fn vm_send_memory( } impl SendAdditionalConnections { + /// How many requests can be waiting to be sent for each connection. This + /// can be set to zero to disable buffering. Whether we need to buffer + /// requests is currently unclear. If this is set too high, some connections + /// might go unused, because work pools up on some connections. + const BUFFERED_REQUESTS_PER_THREAD: usize = 1; + + /// The size of each chunk of memory to send. + /// + /// We want to make this large, because each chunk is acknowledged and we + /// wait for the ack before sending the next chunk. The challenge is that if + /// it is _too_ large, we become more sensitive to network issues, like + /// packet drops in individual connections, because large amounts of data + /// can pool when throughput on one connection is temporarily reduced. + /// + /// We can consider making this configurable, but a better network protocol + /// that doesn't require ACKs would be more efficient. + /// + /// The best-case throughput per connection can be estimated via: + /// effective_throughput = chunk_size / (chunk_size / throughput_per_connection + round_trip_time) + const CHUNK_SIZE: u64 = 64 /* MiB */ << 20; + fn new( destination: &str, connections: NonZeroU32, @@ -969,13 +1003,32 @@ impl SendAdditionalConnections { for _ in 0..(connections.get() - 1) { let socket = send_migration_socket(destination)?; let guest_mem = guest_mem.clone(); - let (send, recv) = std::sync::mpsc::channel::(); + let (send, recv) = std::sync::mpsc::sync_channel::( + Self::BUFFERED_REQUESTS_PER_THREAD, + ); let thread = thread::spawn(move || { + info!("Spawned thread to send VM memory."); + + let mut total_sent = 0; let mut socket = socket; - while let Ok(table) = recv.recv() { - vm_send_memory(&guest_mem, &mut socket, &table).unwrap() + + for msg in recv { + match msg { + SendMemoryThreadMessage::Memory(table) => { + vm_send_memory(&guest_mem, &mut socket, &table).unwrap(); + total_sent += + table.ranges().iter().map(|range| range.length).sum::(); + } + SendMemoryThreadMessage::Barrier(barrier) => { + barrier.wait(); + } + SendMemoryThreadMessage::Disconnect => { + break; + } + } } + info!("Sent {} MiB via additional connection.", total_sent >> 20); }); threads.push(thread); @@ -989,24 +1042,67 @@ impl SendAdditionalConnections { }) } + /// Wait until all data that is in-flight has actually been sent and acknowledged. + fn wait_for_pending_data(&self) { + assert_eq!(self.channels.len(), self.threads.len()); + + // TODO We don't actually need the threads to block at the barrier. We + // can probably find a better implementation that involves less + // synchronization. + + let barrier = Arc::new(Barrier::new(self.channels.len() + 1)); + + for channel in &self.channels { + channel + .send(SendMemoryThreadMessage::Barrier(barrier.clone())) + // The unwrap only fails fi + .unwrap(); + } + + barrier.wait(); + } + /// Send memory via all connections that we have. This may be just one. /// `socket` is the original socket that was used to connect to the /// destination. + /// + /// When this function returns, all memory has been sent and acknowledged. fn send_memory( &self, table: &MemoryRangeTable, socket: &mut SocketStream, ) -> std::result::Result<(), MigratableError> { - warn!("Not sending via multiple connections yet"); + let thread_len = self.threads.len(); + assert_eq!(thread_len, self.channels.len()); + + // The chunk size is chosen to be big enough so that even very fast + // links need some milliseconds to send it. + 'next_partition: for chunk in table.partition(Self::CHUNK_SIZE) { + let chunk = Arc::new(chunk); + + // Find the first free channel and send the chunk via it. + // + // TODO A better implementation wouldn't always start at the + // first thread, but go round-robin. + for channel in &self.channels { + match channel.try_send(SendMemoryThreadMessage::Memory(chunk.clone())) { + Ok(()) => continue 'next_partition, + Err(TrySendError::Full(_)) => { + // Try next channel. + } + Err(TrySendError::Disconnected(_)) => { + return Err(MigratableError::MigrateSend(anyhow!( + "Sending thread died?" + ))); + } + } + } - Request::memory(table.length()).write_to(socket).unwrap(); - table.write_to(socket)?; - // And then the memory itself - send_memory_regions(&self.guest_memory, table, socket)?; - Response::read_from(socket)?.ok_or_abandon( - socket, - MigratableError::MigrateSend(anyhow!("Error during dirty memory migration")), - )?; + // Fallback to sending the chunk via the control connection. + vm_send_memory(&self.guest_memory, socket, &chunk)?; + } + + self.wait_for_pending_data(); Ok(()) } @@ -1014,12 +1110,16 @@ impl SendAdditionalConnections { impl Drop for SendAdditionalConnections { fn drop(&mut self) { - // Drop all channels, so the threads will exit. - self.channels.clear(); + info!("Sending disconnect message to channels"); + self.channels + .drain(..) + .for_each(|channel| channel.send(SendMemoryThreadMessage::Disconnect).unwrap()); + info!("Waiting for threads to finish"); self.threads .drain(..) .for_each(|thread| thread.join().unwrap()); + info!("Threads finished"); } } From 8eb9835495400289d9c1aa3a16b3b74bfb25ed70 Mon Sep 17 00:00:00 2001 From: Julian Stecklina Date: Tue, 28 Oct 2025 11:24:17 +0100 Subject: [PATCH 245/294] vmm: tear down accept thread after receiving This will stop us from listening for more connections on the TCP socket when migration has finished. Tearing down the individual connections will come in a subsequent commit. Co-authored-by: Philipp Schuster On-behalf-of: SAP julian.stecklina@sap.com On-behalf-of: SAP philipp.schuster@sap.com Signed-off-by: Julian Stecklina Signed-off-by: Philipp Schuster --- vmm/src/lib.rs | 106 ++++++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 96 insertions(+), 10 deletions(-) diff --git a/vmm/src/lib.rs b/vmm/src/lib.rs index 0dac6d2de6..901e945312 100644 --- a/vmm/src/lib.rs +++ b/vmm/src/lib.rs @@ -22,6 +22,7 @@ use std::fs::File; use std::io::{Read, Write, stdout}; use std::net::{TcpListener, TcpStream}; use std::num::NonZeroU32; +use std::os::fd::{AsFd, BorrowedFd}; use std::os::unix::io::{AsRawFd, FromRawFd, RawFd}; use std::os::unix::net::{UnixListener, UnixStream}; use std::panic::AssertUnwindSafe; @@ -731,6 +732,45 @@ pub struct Vmm { console_info: Option, } +/// Wait for a file descriptor to become readable. In this case, we return +/// true. In case, the eventfd was signaled, return false. +fn wait_for_readable( + fd: &impl AsFd, + eventfd: &EventFd, +) -> std::result::Result { + let fd_event = eventfd.as_raw_fd().as_raw_fd(); + let fd_io = fd.as_fd().as_raw_fd(); + let mut poll_fds = [ + libc::pollfd { + fd: fd_event, + events: libc::POLLIN, + revents: 0, + }, + libc::pollfd { + fd: fd_io, + events: libc::POLLIN, + revents: 0, + }, + ]; + + // SAFETY: This is safe, because the file descriptors are valid and the + // poll_fds array is properly initialized. + let ret = unsafe { libc::poll(poll_fds.as_mut_ptr(), poll_fds.len() as libc::nfds_t, -1) }; + + if ret < 0 { + return Err(std::io::Error::last_os_error()); + } + + if poll_fds[0].revents & libc::POLLIN != 0 { + return Ok(false); + } + if poll_fds[1].revents & libc::POLLIN != 0 { + return Ok(true); + } + + panic!("Poll returned, but neither file descriptor is readable?"); +} + /// Abstract over the different types of listeners that can be used to receive connections. #[derive(Debug)] enum ReceiveListener { @@ -738,6 +778,15 @@ enum ReceiveListener { Unix(UnixListener, Option), } +impl AsFd for ReceiveListener { + fn as_fd(&self) -> BorrowedFd<'_> { + match self { + ReceiveListener::Tcp(listener) => listener.as_fd(), + ReceiveListener::Unix(listener, _) => listener.as_fd(), + } + } +} + impl ReceiveListener { /// Block until a connection is accepted. fn accept(&mut self) -> std::result::Result { @@ -764,6 +813,16 @@ impl ReceiveListener { } } + /// Same as accept(), but returns None if the eventfd is signaled. + fn abortable_accept( + &mut self, + eventfd: &EventFd, + ) -> std::result::Result, std::io::Error> { + wait_for_readable(&self, eventfd)? + .then(|| self.accept()) + .transpose() + } + fn try_clone(&self) -> std::result::Result { match self { ReceiveListener::Tcp(listener) => listener.try_clone().map(ReceiveListener::Tcp), @@ -823,10 +882,19 @@ where /// We keep track of additional connections for receiving VM migration data /// here. struct ReceiveAdditionalConnections { - accept_thread: std::thread::JoinHandle<()>, + terminate_fd: EventFd, + + // This is only an option to be able to join it in the destructor. + accept_thread: Option>, } impl ReceiveAdditionalConnections { + /// Create a pair of file descriptors that map to the same underlying event_fd. + fn event_fd_pair() -> std::result::Result<(EventFd, EventFd), std::io::Error> { + let event_fd = EventFd::new(0)?; + Ok((event_fd.try_clone()?, event_fd)) + } + /// Starts a thread to accept incoming connections and handle them. These /// additional connections are used to receive additional memory regions /// during VM migration. @@ -834,10 +902,13 @@ impl ReceiveAdditionalConnections { listener: ReceiveListener, guest_memory: GuestMemoryAtomic, ) -> std::result::Result { + let (terminate_fd1, terminate_fd2) = Self::event_fd_pair()?; + let accept_thread = std::thread::spawn(move || { + let terminate_fd = terminate_fd2; let mut listener = listener; let mut threads: Vec> = Vec::new(); - while let Ok(mut socket) = listener.accept() { + while let Ok(Some(mut socket)) = listener.abortable_accept(&terminate_fd) { let guest_memory = guest_memory.clone(); // We handle errors locally and log them. Passing them along is @@ -876,19 +947,34 @@ impl ReceiveAdditionalConnections { }); }); - Ok(Self { accept_thread }) + Ok(Self { + accept_thread: Some(accept_thread), + terminate_fd: terminate_fd1, + }) + } + + /// Stop accepting additional connections and tear down all connections. + /// + /// This function does not wait for the operation to complete. + fn signal_termination(&self) { + // It's not really worth propagating this error, because it only happens if + // something hit the fan and we can't really do anything about it. + if let Err(e) = self.terminate_fd.write(1) { + error!("Failed to wake up other threads: {}", e); + } } } impl Drop for ReceiveAdditionalConnections { fn drop(&mut self) { - // TODO Here we should make sure to shut down the accept thread before - // joining it. This is currently not so easy, because we don't have a - // way to signal the thread to stop accepting connections. - - if !self.accept_thread.is_finished() { - warn!("Accept thread is still running"); - } + self.signal_termination(); + // This unwrap is safe, because we never write a None into + // self.accept_thread in other places. + let _accept_thread = self.accept_thread.take().unwrap(); + + // TODO The accept thread tries to join all threads it started, but we + // haven't implemented tearing them down yet. + // accept_thread.join().unwrap(); } } From 4163a8061350bc91e406fe7f45dbd17bc39956d1 Mon Sep 17 00:00:00 2001 From: Julian Stecklina Date: Tue, 28 Oct 2025 13:19:19 +0100 Subject: [PATCH 246/294] vmm: tear down memory receive threads ... after the VM migration finishes. On-behalf-of: SAP julian.stecklina@sap.com Signed-off-by: Julian Stecklina --- vmm/src/lib.rs | 91 +++++++++++++++++++++++++++++++++++++------------- 1 file changed, 67 insertions(+), 24 deletions(-) diff --git a/vmm/src/lib.rs b/vmm/src/lib.rs index 901e945312..5c48a01224 100644 --- a/vmm/src/lib.rs +++ b/vmm/src/lib.rs @@ -19,7 +19,7 @@ const AUTO_CONVERGE_MAX: u8 = 99; use std::collections::HashMap; use std::fs::File; -use std::io::{Read, Write, stdout}; +use std::io::{ErrorKind, Read, Write, stdout}; use std::net::{TcpListener, TcpStream}; use std::num::NonZeroU32; use std::os::fd::{AsFd, BorrowedFd}; @@ -290,6 +290,15 @@ impl Write for SocketStream { } } +impl AsFd for SocketStream { + fn as_fd(&self) -> BorrowedFd<'_> { + match self { + SocketStream::Unix(s) => s.as_fd(), + SocketStream::Tcp(s) => s.as_fd(), + } + } +} + impl AsRawFd for SocketStream { fn as_raw_fd(&self) -> RawFd { match self { @@ -895,6 +904,56 @@ impl ReceiveAdditionalConnections { Ok((event_fd.try_clone()?, event_fd)) } + /// Handle incoming requests. + /// + /// For now we only handle `Command::Memory` requests here. Everything else + /// needs to come via the main connection. This function returns when the + /// abort_event_fd is triggered or the connection is closed or encountered + /// an error. + fn handle_requests( + socket: &mut SocketStream, + abort_event_fd: &EventFd, + guest_memory: &GuestMemoryAtomic, + ) -> std::result::Result<(), MigratableError> { + loop { + if !wait_for_readable(socket, abort_event_fd).map_err(|e| { + MigratableError::MigrateReceive(anyhow!("Failed to poll descriptors: {e}")) + })? { + info!("Got signal to tear down connection."); + return Ok(()); + } + + // TODO We only check whether we should abort when waiting for a new + // request. If the sender just stops sending data mid-request, we + // should still be abortable, but we are not... In this case, we + // will hang forever. But given that the sender is also in charge of + // driving the migration to completion, this is not a major concern. + // In the long run, it would be preferable to move I/O to + // asynchronous tasks to be able to handle aborts more gracefully. + + let req = match Request::read_from(socket) { + Ok(req) => req, + Err(MigratableError::MigrateSocket(io_error)) + if io_error.kind() == ErrorKind::UnexpectedEof => + { + debug!("Connection closed by peer"); + return Ok(()); + } + Err(e) => return Err(e), + }; + + if req.command() != Command::Memory { + return Err(MigratableError::MigrateReceive(anyhow!( + "Dropping connection. Only Memory commands are allowed on additional connections, but got {:?}", + req.command() + ))); + } + + vm_receive_memory(&req, socket, guest_memory)?; + Response::ok().write_to(socket)?; + } + } + /// Starts a thread to accept incoming connections and handle them. These /// additional connections are used to receive additional memory regions /// during VM migration. @@ -910,33 +969,17 @@ impl ReceiveAdditionalConnections { let mut threads: Vec> = Vec::new(); while let Ok(Some(mut socket)) = listener.abortable_accept(&terminate_fd) { let guest_memory = guest_memory.clone(); + let terminate_fd = terminate_fd.try_clone().unwrap(); // We handle errors locally and log them. Passing them along is // painful with little value. threads.push(std::thread::spawn(move || { - loop { - let req = match Request::read_from(&mut socket) { - Ok(req) => req, - Err(e) => { - error!("Failed to read request: {}", e); - break; - } - }; - - if req.command() != Command::Memory { - error!("Dropping connection. Only Memory commands are allowed on additional connections"); - break; - } - - if let Err(e) = vm_receive_memory(&req, &mut socket, &guest_memory) { - error!("Failed to receive memory: {}", e); - break; - } - - if let Err(e) = Response::ok().write_to(&mut socket) { - error!("Failed to send response: {}", e); - break; - } + if let Err(e) = Self::handle_requests(&mut socket, &terminate_fd, &guest_memory) + { + error!( + "Failed to read more requests on additional receive connection: {}", + e + ); } })); } From d94184708ed7311eaf96658c782ee7199764f754 Mon Sep 17 00:00:00 2001 From: Julian Stecklina Date: Wed, 5 Nov 2025 15:17:43 +0100 Subject: [PATCH 247/294] vmm: gracefully handle remote end not supporting multiple connections We don't die if we don't manage to establish more than the initial connection to the other side. To limit the weird failure cases, we do die when the other side only accepts some connections, but not all. On-behalf-of: SAP julian.stecklina@sap.com Signed-off-by: Julian Stecklina --- vmm/src/lib.rs | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/vmm/src/lib.rs b/vmm/src/lib.rs index 5c48a01224..9cfcc4c9db 100644 --- a/vmm/src/lib.rs +++ b/vmm/src/lib.rs @@ -1129,8 +1129,19 @@ impl SendAdditionalConnections { let mut threads = Vec::new(); let mut channels = Vec::new(); - for _ in 0..(connections.get() - 1) { - let socket = send_migration_socket(destination)?; + for n in 0..(connections.get() - 1) { + let socket = (match send_migration_socket(destination) { + Err(e) if n == 0 => { + // If we encounter a problem on the first additional + // connection, we just assume the other side doesn't support + // multiple connections and carry on. + info!( + "Couldn't establish additional connections for sending VM memory: {e}, ignoring!" + ); + break; + } + otherwise => otherwise, + })?; let guest_mem = guest_mem.clone(); let (send, recv) = std::sync::mpsc::sync_channel::( Self::BUFFERED_REQUESTS_PER_THREAD, From d435a5408a10c7ca4ea0012b81745a23a690b626 Mon Sep 17 00:00:00 2001 From: Julian Stecklina Date: Wed, 5 Nov 2025 15:19:00 +0100 Subject: [PATCH 248/294] vmm: don't send memory chunked if we only have one connection This restores the old behavior/performance in case, we don't use multiple connections. On-behalf-of: SAP julian.stecklina@sap.com Signed-off-by: Julian Stecklina --- vmm/src/lib.rs | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/vmm/src/lib.rs b/vmm/src/lib.rs index 9cfcc4c9db..2d7b63417c 100644 --- a/vmm/src/lib.rs +++ b/vmm/src/lib.rs @@ -1215,6 +1215,15 @@ impl SendAdditionalConnections { let thread_len = self.threads.len(); assert_eq!(thread_len, self.channels.len()); + // In case, we didn't manage to establish additional connections, don't + // bother sending memory in chunks. This would just lower throughput, + // because we wait for a response after each chunk instead of sending + // everything in one go. + if thread_len == 0 { + vm_send_memory(&self.guest_memory, socket, table)?; + return Ok(()); + } + // The chunk size is chosen to be big enough so that even very fast // links need some milliseconds to send it. 'next_partition: for chunk in table.partition(Self::CHUNK_SIZE) { From 6048b8474c8c72a866385f86f5307edcc3e45e97 Mon Sep 17 00:00:00 2001 From: Philipp Schuster Date: Tue, 18 Nov 2025 07:20:58 +0100 Subject: [PATCH 249/294] tests: enable `cargo test --workspace` + `#[cfg(devcli_testenv)]` TL;DR: Massive quality of life improvement for devs Cloud Hypervisor uses the Cargo test framework for multiple tests: - normal unit tests - unit tests requiring special environment (the Tap device tests) - integration tests requiring a special environment This prevented the execution of `cargo test --workspace`, which results in a very poor developer experience. Although `./scripts/run_unit_tests.sh` exists, there are valid reasons why devs cannot or even don't want to use it. By adding a new `chv_testenv` rustc config, we can conditionally only activate tests when the `./scripts/` magic runs them. This improves the general developer experience by a lot. Signed-off-by: Philipp Schuster On-behalf-of: SAP philipp.schuster@sap.com --- Cargo.toml | 4 ++++ net_util/src/tap.rs | 1 + scripts/dev_cli.sh | 3 +++ scripts/run_integration_tests_aarch64.sh | 2 ++ scripts/run_integration_tests_live_migration.sh | 3 +++ scripts/run_integration_tests_rate_limiter.sh | 3 +++ scripts/run_integration_tests_vfio.sh | 3 +++ scripts/run_integration_tests_windows_aarch64.sh | 2 ++ scripts/run_integration_tests_windows_x86_64.sh | 2 ++ scripts/run_integration_tests_x86_64.sh | 7 +++---- tests/integration.rs | 1 + 11 files changed, 27 insertions(+), 4 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index d84f4f683d..ef46a541de 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -168,3 +168,7 @@ thiserror = "2.0.12" uuid = { version = "1.18.1" } wait-timeout = "0.2.1" zerocopy = { version = "0.8.26", default-features = false } + +[workspace.lints.rust] +# `level = warn` is irrelevant here but mandatory for rustc/cargo +unexpected_cfgs = { level = "warn", check-cfg = ['cfg(devcli_testenv)'] } diff --git a/net_util/src/tap.rs b/net_util/src/tap.rs index 93e1b96a0e..222ca115d0 100644 --- a/net_util/src/tap.rs +++ b/net_util/src/tap.rs @@ -563,6 +563,7 @@ impl AsRawFd for Tap { } #[cfg(test)] +#[cfg(devcli_testenv)] // we need special permissions in the ENV to create Tap devices mod tests { use std::net::Ipv4Addr; use std::sync::{LazyLock, Mutex, mpsc}; diff --git a/scripts/dev_cli.sh b/scripts/dev_cli.sh index 519517dcdb..2c1eab6013 100755 --- a/scripts/dev_cli.sh +++ b/scripts/dev_cli.sh @@ -47,6 +47,9 @@ CARGO_GIT_REGISTRY_DIR="${CLH_BUILD_DIR}/cargo_git_registry" # Full path to the cargo target dir on the host. CARGO_TARGET_DIR="${CLH_BUILD_DIR}/cargo_target" +# Let tests know that the special environment is set up. +RUSTFLAGS="${RUSTFLAGS} --cfg devcli_testenv" + # Send a decorated message to stdout, followed by a new line # say() { diff --git a/scripts/run_integration_tests_aarch64.sh b/scripts/run_integration_tests_aarch64.sh index 758c69c6b2..4afa523772 100755 --- a/scripts/run_integration_tests_aarch64.sh +++ b/scripts/run_integration_tests_aarch64.sh @@ -190,7 +190,9 @@ if [ $RES -ne 0 ]; then exit 1 fi +# Common configuration for every test run export RUST_BACKTRACE=1 +export RUSTFLAGS="$RUSTFLAGS" cargo build --all --release --target "$BUILD_TARGET" diff --git a/scripts/run_integration_tests_live_migration.sh b/scripts/run_integration_tests_live_migration.sh index fa0b3dcf45..f191b0baa1 100755 --- a/scripts/run_integration_tests_live_migration.sh +++ b/scripts/run_integration_tests_live_migration.sh @@ -83,7 +83,10 @@ PAGE_NUM=$((12288 * 1024 / HUGEPAGESIZE)) echo "$PAGE_NUM" | sudo tee /proc/sys/vm/nr_hugepages sudo chmod a+rwX /dev/hugepages +# Common configuration for every test run export RUST_BACKTRACE=1 +export RUSTFLAGS="$RUSTFLAGS" + time cargo test $test_features "live_migration_parallel::$test_filter" -- ${test_binary_args[*]} RES=$? diff --git a/scripts/run_integration_tests_rate_limiter.sh b/scripts/run_integration_tests_rate_limiter.sh index 56fb91e6e0..2e14aaffb2 100755 --- a/scripts/run_integration_tests_rate_limiter.sh +++ b/scripts/run_integration_tests_rate_limiter.sh @@ -55,7 +55,10 @@ fi cargo build --features mshv --all --release --target "$BUILD_TARGET" +# Common configuration for every test run export RUST_BACKTRACE=1 +export RUSTFLAGS="$RUSTFLAGS" + time cargo test $test_features "rate_limiter::$test_filter" -- --test-threads=1 ${test_binary_args[*]} RES=$? diff --git a/scripts/run_integration_tests_vfio.sh b/scripts/run_integration_tests_vfio.sh index 4d7bac60a4..b182c6612c 100755 --- a/scripts/run_integration_tests_vfio.sh +++ b/scripts/run_integration_tests_vfio.sh @@ -26,7 +26,10 @@ fi cargo build --features mshv --all --release --target "$BUILD_TARGET" +# Common configuration for every test run export RUST_BACKTRACE=1 +export RUSTFLAGS="$RUSTFLAGS" + time cargo test "vfio::test_nvidia" -- --test-threads=1 ${test_binary_args[*]} RES=$? diff --git a/scripts/run_integration_tests_windows_aarch64.sh b/scripts/run_integration_tests_windows_aarch64.sh index 92d66f805d..0190aa1c6c 100755 --- a/scripts/run_integration_tests_windows_aarch64.sh +++ b/scripts/run_integration_tests_windows_aarch64.sh @@ -36,7 +36,9 @@ dmsetup mknodes dmsetup create windows-snapshot-base --table "0 $img_blk_size snapshot-origin /dev/mapper/windows-base" dmsetup mknodes +# Common configuration for every test run export RUST_BACKTRACE=1 +export RUSTFLAGS="$RUSTFLAGS" cargo build --all --release --target "$BUILD_TARGET" diff --git a/scripts/run_integration_tests_windows_x86_64.sh b/scripts/run_integration_tests_windows_x86_64.sh index 2b11a6e687..714f6e4788 100755 --- a/scripts/run_integration_tests_windows_x86_64.sh +++ b/scripts/run_integration_tests_windows_x86_64.sh @@ -41,7 +41,9 @@ dmsetup mknodes cargo build --features mshv --all --release --target "$BUILD_TARGET" +# Common configuration for every test run export RUST_BACKTRACE=1 +export RUSTFLAGS="$RUSTFLAGS" # Only run with 1 thread to avoid tests interfering with one another because # Windows has a static IP configured diff --git a/scripts/run_integration_tests_x86_64.sh b/scripts/run_integration_tests_x86_64.sh index 3f28e23cdf..662e274a93 100755 --- a/scripts/run_integration_tests_x86_64.sh +++ b/scripts/run_integration_tests_x86_64.sh @@ -177,14 +177,16 @@ ulimit -l unlimited # Set number of open descriptors high enough for VFIO tests to run ulimit -n 4096 +# Common configuration for every test run export RUST_BACKTRACE=1 +export RUSTFLAGS="$RUSTFLAGS" + time cargo test --release --target "$BUILD_TARGET" $test_features "common_parallel::$test_filter" -- ${test_binary_args[*]} RES=$? # Run some tests in sequence since the result could be affected by other tests # running in parallel. if [ $RES -eq 0 ]; then - export RUST_BACKTRACE=1 time cargo test --release --target "$BUILD_TARGET" $test_features "common_sequential::$test_filter" -- --test-threads=1 ${test_binary_args[*]} RES=$? fi @@ -192,7 +194,6 @@ fi # Run tests on dbus_api if [ $RES -eq 0 ]; then cargo build --features "mshv,dbus_api" --all --release --target "$BUILD_TARGET" - export RUST_BACKTRACE=1 # integration tests now do not reply on build feature "dbus_api" time cargo test $test_features "dbus_api::$test_filter" -- ${test_binary_args[*]} RES=$? @@ -201,14 +202,12 @@ fi # Run tests on fw_cfg if [ $RES -eq 0 ]; then cargo build --features "mshv,fw_cfg" --all --release --target "$BUILD_TARGET" - export RUST_BACKTRACE=1 time cargo test "fw_cfg::$test_filter" --target "$BUILD_TARGET" -- ${test_binary_args[*]} RES=$? fi if [ $RES -eq 0 ]; then cargo build --features ivshmem --all --release --target "$BUILD_TARGET" - export RUST_BACKTRACE=1 time cargo test $test_features "ivshmem::$test_filter" --target "$BUILD_TARGET" -- ${test_binary_args[*]} RES=$? fi diff --git a/tests/integration.rs b/tests/integration.rs index 5ad03d0b2c..d3539fe877 100644 --- a/tests/integration.rs +++ b/tests/integration.rs @@ -2,6 +2,7 @@ // // SPDX-License-Identifier: Apache-2.0 // +#![cfg(devcli_testenv)] #![allow(clippy::undocumented_unsafe_blocks)] // When enabling the `mshv` feature, we skip quite some tests and // hence have known dead-code. This annotation silences dead-code From 368bdaa5d0fb57715212c7df3136fba8dcab5d4b Mon Sep 17 00:00:00 2001 From: Philipp Schuster Date: Tue, 18 Nov 2025 07:33:24 +0100 Subject: [PATCH 250/294] tests: prevent broken terminal after running `cargo test -p vmm` Signed-off-by: Philipp Schuster On-behalf-of: SAP philipp.schuster@sap.com --- vmm/src/lib.rs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/vmm/src/lib.rs b/vmm/src/lib.rs index 2d7b63417c..3b8cc9a620 100644 --- a/vmm/src/lib.rs +++ b/vmm/src/lib.rs @@ -3236,7 +3236,8 @@ mod unit_tests { }, console: ConsoleConfig { file: None, - mode: ConsoleOutputMode::Tty, + // Caution: Don't use `Tty` to not mess with users terminal + mode: ConsoleOutputMode::Off, iommu: false, socket: None, url: None, From d23af4f589b77d0babf95f4d922a94cb0e9bbe19 Mon Sep 17 00:00:00 2001 From: Philipp Schuster Date: Mon, 17 Nov 2025 15:42:20 +0100 Subject: [PATCH 251/294] misc: improve developer experience of cargo clippy A major improvement to the developer experience of clippy in Cloud Hypervisor. 1. Make `cargo clippy` just work with the same lints we use in CI 2. Simplify adding new lints Signed-off-by: Philipp Schuster On-behalf-of: SAP philipp.schuster@sap.com --- .github/workflows/build.yaml | 24 +++++++++---------- .../workflows/preview-riscv64-modules.yaml | 0 .github/workflows/quality.yaml | 24 +++++++++---------- Cargo.toml | 9 +++++++ api_client/Cargo.toml | 3 +++ arch/Cargo.toml | 3 +++ block/Cargo.toml | 3 +++ devices/Cargo.toml | 3 +++ event_monitor/Cargo.toml | 3 +++ hypervisor/Cargo.toml | 3 +++ net_gen/Cargo.toml | 3 +++ net_util/Cargo.toml | 3 +++ option_parser/Cargo.toml | 3 +++ pci/Cargo.toml | 3 +++ performance-metrics/Cargo.toml | 3 +++ rate_limiter/Cargo.toml | 3 +++ serial_buffer/Cargo.toml | 3 +++ test_infra/Cargo.toml | 3 +++ tpm/Cargo.toml | 3 +++ tracer/Cargo.toml | 3 +++ vhost_user_block/Cargo.toml | 3 +++ vhost_user_net/Cargo.toml | 3 +++ virtio-devices/Cargo.toml | 3 +++ vm-allocator/Cargo.toml | 3 +++ vm-device/Cargo.toml | 3 +++ vm-migration/Cargo.toml | 3 +++ vm-virtio/Cargo.toml | 3 +++ vmm/Cargo.toml | 3 +++ 28 files changed, 105 insertions(+), 24 deletions(-) create mode 100644 .github/workflows/preview-riscv64-modules.yaml diff --git a/.github/workflows/build.yaml b/.github/workflows/build.yaml index 5984430b89..3778f6b9d8 100644 --- a/.github/workflows/build.yaml +++ b/.github/workflows/build.yaml @@ -30,40 +30,40 @@ jobs: target: ${{ matrix.target }} - name: Build (default features) - run: cargo rustc --locked --bin cloud-hypervisor -- -D warnings -D clippy::undocumented_unsafe_blocks -W clippy::assertions_on_result_states + run: cargo rustc --locked --bin cloud-hypervisor -- -D warnings - name: Build (kvm) - run: cargo rustc --locked --bin cloud-hypervisor --no-default-features --features "kvm" -- -D warnings -D clippy::undocumented_unsafe_blocks -W clippy::assertions_on_result_states + run: cargo rustc --locked --bin cloud-hypervisor --no-default-features --features "kvm" -- -D warnings - name: Build (default features + tdx) - run: cargo rustc --locked --bin cloud-hypervisor --features "tdx" -- -D warnings -D clippy::undocumented_unsafe_blocks -W clippy::assertions_on_result_states + run: cargo rustc --locked --bin cloud-hypervisor --features "tdx" -- -D warnings - name: Build (default features + dbus_api) - run: cargo rustc --locked --bin cloud-hypervisor --features "dbus_api" -- -D warnings -D clippy::undocumented_unsafe_blocks -W clippy::assertions_on_result_states + run: cargo rustc --locked --bin cloud-hypervisor --features "dbus_api" -- -D warnings - name: Build (default features + guest_debug) - run: cargo rustc --locked --bin cloud-hypervisor --features "guest_debug" -- -D warnings -D clippy::undocumented_unsafe_blocks -W clippy::assertions_on_result_states + run: cargo rustc --locked --bin cloud-hypervisor --features "guest_debug" -- -D warnings - name: Build (default features + pvmemcontrol) - run: cargo rustc --locked --bin cloud-hypervisor --features "pvmemcontrol" -- -D warnings -D clippy::undocumented_unsafe_blocks -W clippy::assertions_on_result_states + run: cargo rustc --locked --bin cloud-hypervisor --features "pvmemcontrol" -- -D warnings - name: Build (default features + fw_cfg) - run: cargo rustc --locked --bin cloud-hypervisor --features "fw_cfg" -- -D warnings -D clippy::undocumented_unsafe_blocks -W clippy::assertions_on_result_states + run: cargo rustc --locked --bin cloud-hypervisor --features "fw_cfg" -- -D warnings - name: Build (default features + ivshmem) - run: cargo rustc --locked --bin cloud-hypervisor --features "ivshmem" -- -D warnings -D clippy::undocumented_unsafe_blocks -W clippy::assertions_on_result_states + run: cargo rustc --locked --bin cloud-hypervisor --features "ivshmem" -- -D warnings - name: Build (mshv) - run: cargo rustc --locked --bin cloud-hypervisor --no-default-features --features "mshv" -- -D warnings -D clippy::undocumented_unsafe_blocks -W clippy::assertions_on_result_states + run: cargo rustc --locked --bin cloud-hypervisor --no-default-features --features "mshv" -- -D warnings - name: Build (sev_snp) - run: cargo rustc --locked --bin cloud-hypervisor --no-default-features --features "sev_snp" -- -D warnings -D clippy::undocumented_unsafe_blocks -W clippy::assertions_on_result_states + run: cargo rustc --locked --bin cloud-hypervisor --no-default-features --features "sev_snp" -- -D warnings - name: Build (igvm) - run: cargo rustc --locked --bin cloud-hypervisor --no-default-features --features "igvm" -- -D warnings -D clippy::undocumented_unsafe_blocks -W clippy::assertions_on_result_states + run: cargo rustc --locked --bin cloud-hypervisor --no-default-features --features "igvm" -- -D warnings - name: Build (mshv + kvm) - run: cargo rustc --locked --bin cloud-hypervisor --no-default-features --features "mshv,kvm" -- -D warnings -D clippy::undocumented_unsafe_blocks -W clippy::assertions_on_result_states + run: cargo rustc --locked --bin cloud-hypervisor --no-default-features --features "mshv,kvm" -- -D warnings - name: Release Build (default features) run: cargo build --locked --all --release --target=${{ matrix.target }} diff --git a/.github/workflows/preview-riscv64-modules.yaml b/.github/workflows/preview-riscv64-modules.yaml new file mode 100644 index 0000000000..e69de29bb2 diff --git a/.github/workflows/quality.yaml b/.github/workflows/quality.yaml index 57b33b9976..f7b5206e6e 100644 --- a/.github/workflows/quality.yaml +++ b/.github/workflows/quality.yaml @@ -50,7 +50,7 @@ jobs: cross-version: 3e0957637b49b1bbced23ad909170650c5b70635 toolchain: ${{ matrix.rust }} target: ${{ matrix.target }} - args: --locked --all --all-targets --no-default-features --tests --examples --features "kvm" -- -D warnings -D clippy::undocumented_unsafe_blocks -W clippy::assertions_on_result_states + args: --locked --all --all-targets --no-default-features --tests --examples --features "kvm" -- -D warnings - name: Clippy (mshv) uses: houseabsolute/actions-rust-cross@v1 @@ -59,7 +59,7 @@ jobs: cross-version: 3e0957637b49b1bbced23ad909170650c5b70635 toolchain: ${{ matrix.rust }} target: ${{ matrix.target }} - args: --locked --all --all-targets --no-default-features --tests --examples --features "mshv" -- -D warnings -D clippy::undocumented_unsafe_blocks -W clippy::assertions_on_result_states + args: --locked --all --all-targets --no-default-features --tests --examples --features "mshv" -- -D warnings - name: Clippy (mshv + kvm) uses: houseabsolute/actions-rust-cross@v1 @@ -68,7 +68,7 @@ jobs: cross-version: 3e0957637b49b1bbced23ad909170650c5b70635 toolchain: ${{ matrix.rust }} target: ${{ matrix.target }} - args: --locked --all --all-targets --no-default-features --tests --examples --features "mshv,kvm" -- -D warnings -D clippy::undocumented_unsafe_blocks -W clippy::assertions_on_result_states + args: --locked --all --all-targets --no-default-features --tests --examples --features "mshv,kvm" -- -D warnings - name: Clippy (default features) uses: houseabsolute/actions-rust-cross@v1 @@ -77,7 +77,7 @@ jobs: cross-version: 3e0957637b49b1bbced23ad909170650c5b70635 toolchain: ${{ matrix.rust }} target: ${{ matrix.target }} - args: --locked --all --all-targets --tests --examples -- -D warnings -D clippy::undocumented_unsafe_blocks -W clippy::assertions_on_result_states + args: --locked --all --all-targets --tests --examples -- -D warnings - name: Clippy (default features + guest_debug) uses: houseabsolute/actions-rust-cross@v1 @@ -86,7 +86,7 @@ jobs: cross-version: 3e0957637b49b1bbced23ad909170650c5b70635 toolchain: ${{ matrix.rust }} target: ${{ matrix.target }} - args: --locked --all --all-targets --tests --examples --features "guest_debug" -- -D warnings -D clippy::undocumented_unsafe_blocks -W clippy::assertions_on_result_states + args: --locked --all --all-targets --tests --examples --features "guest_debug" -- -D warnings - name: Clippy (default features + pvmemcontrol) uses: houseabsolute/actions-rust-cross@v1 @@ -95,7 +95,7 @@ jobs: cross-version: 3e0957637b49b1bbced23ad909170650c5b70635 toolchain: ${{ matrix.rust }} target: ${{ matrix.target }} - args: --locked --all --all-targets --tests --examples --features "pvmemcontrol" -- -D warnings -D clippy::undocumented_unsafe_blocks -W clippy::assertions_on_result_states + args: --locked --all --all-targets --tests --examples --features "pvmemcontrol" -- -D warnings - name: Clippy (default features + tracing) uses: houseabsolute/actions-rust-cross@v1 @@ -104,13 +104,13 @@ jobs: cross-version: 3e0957637b49b1bbced23ad909170650c5b70635 toolchain: ${{ matrix.rust }} target: ${{ matrix.target }} - args: --locked --all --all-targets --tests --examples --features "tracing" -- -D warnings -D clippy::undocumented_unsafe_blocks -W clippy::assertions_on_result_states + args: --locked --all --all-targets --tests --examples --features "tracing" -- -D warnings - name: Clippy (default features + fw_cfg) uses: actions-rs/cargo@v1 with: use-cross: ${{ matrix.target != 'x86_64-unknown-linux-gnu' }} command: clippy - args: --target=${{ matrix.target }} --locked --all --all-targets --tests --examples --features "fw_cfg" -- -D warnings -D clippy::undocumented_unsafe_blocks -W clippy::assertions_on_result_states + args: --target=${{ matrix.target }} --locked --all --all-targets --tests --examples --features "fw_cfg" -- -D warnings - name: Clippy (default features + ivshmem) uses: houseabsolute/actions-rust-cross@v1 @@ -119,7 +119,7 @@ jobs: cross-version: 3e0957637b49b1bbced23ad909170650c5b70635 toolchain: ${{ matrix.rust }} target: ${{ matrix.target }} - args: --locked --all --all-targets --tests --examples --features "ivshmem" -- -D warnings -D clippy::undocumented_unsafe_blocks -W clippy::assertions_on_result_states + args: --locked --all --all-targets --tests --examples --features "ivshmem" -- -D warnings - name: Clippy (sev_snp) if: ${{ matrix.target == 'x86_64-unknown-linux-gnu' }} @@ -129,7 +129,7 @@ jobs: cross-version: 3e0957637b49b1bbced23ad909170650c5b70635 toolchain: ${{ matrix.rust }} target: ${{ matrix.target }} - args: --locked --all --all-targets --no-default-features --tests --examples --features "sev_snp" -- -D warnings -D clippy::undocumented_unsafe_blocks -W clippy::assertions_on_result_states + args: --locked --all --all-targets --no-default-features --tests --examples --features "sev_snp" -- -D warnings - name: Clippy (igvm) if: ${{ matrix.target == 'x86_64-unknown-linux-gnu' }} @@ -139,7 +139,7 @@ jobs: cross-version: 3e0957637b49b1bbced23ad909170650c5b70635 toolchain: ${{ matrix.rust }} target: ${{ matrix.target }} - args: --locked --all --all-targets --no-default-features --tests --examples --features "igvm" -- -D warnings -D clippy::undocumented_unsafe_blocks -W clippy::assertions_on_result_states + args: --locked --all --all-targets --no-default-features --tests --examples --features "igvm" -- -D warnings - name: Clippy (kvm + tdx) if: ${{ matrix.target == 'x86_64-unknown-linux-gnu' }} @@ -149,7 +149,7 @@ jobs: cross-version: 3e0957637b49b1bbced23ad909170650c5b70635 toolchain: ${{ matrix.rust }} target: ${{ matrix.target }} - args: --locked --all --all-targets --no-default-features --tests --examples --features "tdx,kvm" -- -D warnings -D clippy::undocumented_unsafe_blocks -W clippy::assertions_on_result_states + args: --locked --all --all-targets --no-default-features --tests --examples --features "tdx,kvm" -- -D warnings - name: Check build did not modify any files run: test -z "$(git status --porcelain)" diff --git a/Cargo.toml b/Cargo.toml index ef46a541de..ea6c7dac3b 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -90,6 +90,9 @@ sev_snp = ["igvm", "mshv", "vmm/sev_snp"] tdx = ["vmm/tdx"] tracing = ["tracer/tracing", "vmm/tracing"] +[lints] +workspace = true + [workspace] members = [ "api_client", @@ -172,3 +175,9 @@ zerocopy = { version = "0.8.26", default-features = false } [workspace.lints.rust] # `level = warn` is irrelevant here but mandatory for rustc/cargo unexpected_cfgs = { level = "warn", check-cfg = ['cfg(devcli_testenv)'] } + +[workspace.lints.clippy] +# Any clippy lint in alphabetical order, including lint groups: +# https://rust-lang.github.io/rust-clippy/master/index.html +assertions_on_result_states = "deny" +undocumented_unsafe_blocks = "deny" diff --git a/api_client/Cargo.toml b/api_client/Cargo.toml index 429ecbf927..b8791dfc3d 100644 --- a/api_client/Cargo.toml +++ b/api_client/Cargo.toml @@ -7,3 +7,6 @@ version = "0.1.0" [dependencies] thiserror = { workspace = true } vmm-sys-util = { workspace = true } + +[lints] +workspace = true diff --git a/arch/Cargo.toml b/arch/Cargo.toml index 3bd32affb1..804be793d0 100644 --- a/arch/Cargo.toml +++ b/arch/Cargo.toml @@ -27,3 +27,6 @@ vmm-sys-util = { workspace = true, features = ["with-serde"] } [target.'cfg(any(target_arch = "aarch64", target_arch = "riscv64"))'.dependencies] fdt_parser = { version = "0.1.5", package = "fdt" } vm-fdt = { workspace = true } + +[lints] +workspace = true diff --git a/block/Cargo.toml b/block/Cargo.toml index db4ac9a6b3..9823c1f818 100644 --- a/block/Cargo.toml +++ b/block/Cargo.toml @@ -28,3 +28,6 @@ vm-memory = { workspace = true, features = [ ] } vm-virtio = { path = "../vm-virtio" } vmm-sys-util = { workspace = true } + +[lints] +workspace = true diff --git a/devices/Cargo.toml b/devices/Cargo.toml index bf620eca90..08aa7fd60d 100644 --- a/devices/Cargo.toml +++ b/devices/Cargo.toml @@ -48,3 +48,6 @@ fw_cfg = ["arch/fw_cfg", "bitfield-struct", "linux-loader", "zerocopy"] ivshmem = [] kvm = ["arch/kvm"] pvmemcontrol = [] + +[lints] +workspace = true diff --git a/event_monitor/Cargo.toml b/event_monitor/Cargo.toml index b2b7a4e48d..41d3102807 100644 --- a/event_monitor/Cargo.toml +++ b/event_monitor/Cargo.toml @@ -9,3 +9,6 @@ flume = { workspace = true } libc = { workspace = true } serde = { workspace = true, features = ["derive", "rc"] } serde_json = { workspace = true } + +[lints] +workspace = true diff --git a/hypervisor/Cargo.toml b/hypervisor/Cargo.toml index c756cf9fc1..73b2cd612a 100644 --- a/hypervisor/Cargo.toml +++ b/hypervisor/Cargo.toml @@ -63,3 +63,6 @@ version = "1.21.0" [dev-dependencies] env_logger = { workspace = true } + +[lints] +workspace = true diff --git a/net_gen/Cargo.toml b/net_gen/Cargo.toml index b1443c1f29..a99c7c995d 100644 --- a/net_gen/Cargo.toml +++ b/net_gen/Cargo.toml @@ -7,3 +7,6 @@ version = "0.1.0" [dependencies] vmm-sys-util = { workspace = true } + +[lints] +workspace = true diff --git a/net_util/Cargo.toml b/net_util/Cargo.toml index 8f5df72225..a55db49f8a 100644 --- a/net_util/Cargo.toml +++ b/net_util/Cargo.toml @@ -27,3 +27,6 @@ vmm-sys-util = { workspace = true } pnet = "0.35.0" pnet_datalink = "0.35.0" serde_json = { workspace = true } + +[lints] +workspace = true diff --git a/option_parser/Cargo.toml b/option_parser/Cargo.toml index abacf51ddd..3d76690b41 100644 --- a/option_parser/Cargo.toml +++ b/option_parser/Cargo.toml @@ -6,3 +6,6 @@ version = "0.1.0" [dependencies] thiserror = { workspace = true } + +[lints] +workspace = true diff --git a/pci/Cargo.toml b/pci/Cargo.toml index e1d631c348..760baae03d 100644 --- a/pci/Cargo.toml +++ b/pci/Cargo.toml @@ -29,3 +29,6 @@ vm-memory = { workspace = true, features = [ ] } vm-migration = { path = "../vm-migration" } vmm-sys-util = { workspace = true } + +[lints] +workspace = true diff --git a/performance-metrics/Cargo.toml b/performance-metrics/Cargo.toml index 00e6e74682..531c6abe99 100644 --- a/performance-metrics/Cargo.toml +++ b/performance-metrics/Cargo.toml @@ -12,3 +12,6 @@ serde = { workspace = true, features = ["derive", "rc"] } serde_json = { workspace = true } test_infra = { path = "../test_infra" } thiserror = { workspace = true } + +[lints] +workspace = true diff --git a/rate_limiter/Cargo.toml b/rate_limiter/Cargo.toml index 3067c695bb..206ec7b7f8 100644 --- a/rate_limiter/Cargo.toml +++ b/rate_limiter/Cargo.toml @@ -9,3 +9,6 @@ libc = { workspace = true } log = { workspace = true } thiserror = { workspace = true } vmm-sys-util = { workspace = true } + +[lints] +workspace = true diff --git a/serial_buffer/Cargo.toml b/serial_buffer/Cargo.toml index 0691b8a3b7..767c8a97ff 100644 --- a/serial_buffer/Cargo.toml +++ b/serial_buffer/Cargo.toml @@ -3,3 +3,6 @@ authors = ["The Cloud Hypervisor Authors"] edition.workspace = true name = "serial_buffer" version = "0.1.0" + +[lints] +workspace = true diff --git a/test_infra/Cargo.toml b/test_infra/Cargo.toml index 8fdefed15a..e6ea592c39 100644 --- a/test_infra/Cargo.toml +++ b/test_infra/Cargo.toml @@ -13,3 +13,6 @@ ssh2 = { version = "0.9.5", features = ["vendored-openssl"] } thiserror = { workspace = true } vmm-sys-util = { workspace = true } wait-timeout = { workspace = true } + +[lints] +workspace = true diff --git a/tpm/Cargo.toml b/tpm/Cargo.toml index cf03968cde..82dc8f79be 100644 --- a/tpm/Cargo.toml +++ b/tpm/Cargo.toml @@ -12,3 +12,6 @@ log = { workspace = true } net_gen = { path = "../net_gen" } thiserror = { workspace = true } vmm-sys-util = { workspace = true } + +[lints] +workspace = true diff --git a/tracer/Cargo.toml b/tracer/Cargo.toml index bdcf559695..1ac9f4e393 100644 --- a/tracer/Cargo.toml +++ b/tracer/Cargo.toml @@ -12,3 +12,6 @@ serde_json = { workspace = true } [features] tracing = [] + +[lints] +workspace = true diff --git a/vhost_user_block/Cargo.toml b/vhost_user_block/Cargo.toml index c2e7385668..e674c96eac 100644 --- a/vhost_user_block/Cargo.toml +++ b/vhost_user_block/Cargo.toml @@ -19,3 +19,6 @@ virtio-bindings = { workspace = true } virtio-queue = { workspace = true } vm-memory = { workspace = true } vmm-sys-util = { workspace = true } + +[lints] +workspace = true diff --git a/vhost_user_net/Cargo.toml b/vhost_user_net/Cargo.toml index 6cd316e9fe..849ad5426e 100644 --- a/vhost_user_net/Cargo.toml +++ b/vhost_user_net/Cargo.toml @@ -19,3 +19,6 @@ vhost-user-backend = { workspace = true } virtio-bindings = { workspace = true } vm-memory = { workspace = true } vmm-sys-util = { workspace = true } + +[lints] +workspace = true diff --git a/virtio-devices/Cargo.toml b/virtio-devices/Cargo.toml index 64a60910ca..5cbfe145f4 100644 --- a/virtio-devices/Cargo.toml +++ b/virtio-devices/Cargo.toml @@ -47,3 +47,6 @@ vm-memory = { workspace = true, features = [ vm-migration = { path = "../vm-migration" } vm-virtio = { path = "../vm-virtio" } vmm-sys-util = { workspace = true } + +[lints] +workspace = true diff --git a/vm-allocator/Cargo.toml b/vm-allocator/Cargo.toml index e77e877917..a4996d6dc3 100644 --- a/vm-allocator/Cargo.toml +++ b/vm-allocator/Cargo.toml @@ -14,3 +14,6 @@ vm-memory = { workspace = true } [target.'cfg(any(target_arch = "aarch64", target_arch = "riscv64"))'.dependencies] arch = { path = "../arch" } + +[lints] +workspace = true diff --git a/vm-device/Cargo.toml b/vm-device/Cargo.toml index 80ed1489a3..a57ea57f5b 100644 --- a/vm-device/Cargo.toml +++ b/vm-device/Cargo.toml @@ -16,3 +16,6 @@ thiserror = { workspace = true } vfio-ioctls = { workspace = true, default-features = false } vm-memory = { workspace = true, features = ["backend-mmap"] } vmm-sys-util = { workspace = true } + +[lints] +workspace = true diff --git a/vm-migration/Cargo.toml b/vm-migration/Cargo.toml index 69d57076f5..b17475065c 100644 --- a/vm-migration/Cargo.toml +++ b/vm-migration/Cargo.toml @@ -11,3 +11,6 @@ serde = { workspace = true, features = ["derive", "rc"] } serde_json = { workspace = true } thiserror = { workspace = true } vm-memory = { workspace = true, features = ["backend-atomic", "backend-mmap"] } + +[lints] +workspace = true diff --git a/vm-virtio/Cargo.toml b/vm-virtio/Cargo.toml index 5f195af492..228f552416 100644 --- a/vm-virtio/Cargo.toml +++ b/vm-virtio/Cargo.toml @@ -14,3 +14,6 @@ vm-memory = { workspace = true, features = [ "backend-bitmap", "backend-mmap", ] } + +[lints] +workspace = true diff --git a/vmm/Cargo.toml b/vmm/Cargo.toml index f6d5e9cb8b..0608aef7fb 100644 --- a/vmm/Cargo.toml +++ b/vmm/Cargo.toml @@ -96,3 +96,6 @@ vm-virtio = { path = "../vm-virtio" } vmm-sys-util = { workspace = true, features = ["with-serde"] } zbus = { version = "5.7.1", optional = true } zerocopy = { workspace = true, features = ["alloc", "derive"] } + +[lints] +workspace = true From 89c1f71705ef49efb1a12be3d03e12827cec8ecd Mon Sep 17 00:00:00 2001 From: Philipp Schuster Date: Tue, 18 Nov 2025 09:24:53 +0100 Subject: [PATCH 252/294] misc: clippy: add default clippy lint groups This is the first commit in a series of commits to improve the Code Quality in Cloud Hypervisor in a sustainable way. These are the default rules from `clippy::all` but written here to be more explicit. `clippy::all` refers to all "default sensible" lints, not all existing lints. Signed-off-by: Philipp Schuster On-behalf-of: SAP philipp.schuster@sap.com --- Cargo.toml | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index ea6c7dac3b..c948bca3f3 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -177,7 +177,17 @@ zerocopy = { version = "0.8.26", default-features = false } unexpected_cfgs = { level = "warn", check-cfg = ['cfg(devcli_testenv)'] } [workspace.lints.clippy] -# Any clippy lint in alphabetical order, including lint groups: +# Any clippy lint (group) in alphabetical order: # https://rust-lang.github.io/rust-clippy/master/index.html + +# Groups +all = "deny" # shorthand for the other groups but here for compleness +complexity = "deny" +correctness = "deny" +perf = "deny" +style = "deny" +suspicious = "deny" + +# Individual Lints assertions_on_result_states = "deny" undocumented_unsafe_blocks = "deny" From f73f63cbb3a1c550aff8bb2970575fc4c346503a Mon Sep 17 00:00:00 2001 From: Philipp Schuster Date: Fri, 21 Nov 2025 10:48:15 +0100 Subject: [PATCH 253/294] vmm: migration: prepare EventFd for async migration events This is a pre-requisite for the following commit which puts the migration into a dedicated thread. It allows the VMM to react to migration events (success/failure). The commit series was inspired by @ljcore [0] but was changed quite significantly. [0] https://github.com/cloud-hypervisor/cloud-hypervisor/pull/7038 Signed-off-by: Philipp Schuster On-behalf-of: SAP philipp.schuster@sap.com --- vmm/src/lib.rs | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/vmm/src/lib.rs b/vmm/src/lib.rs index 3b8cc9a620..1e467b9e31 100644 --- a/vmm/src/lib.rs +++ b/vmm/src/lib.rs @@ -243,6 +243,7 @@ pub enum EpollDispatch { Api = 2, ActivateVirtioDevices = 3, Debug = 4, + CheckMigration = 5, Unknown, } @@ -255,6 +256,7 @@ impl From for EpollDispatch { 2 => Api, 3 => ActivateVirtioDevices, 4 => Debug, + 5 => CheckMigration, _ => Unknown, } } @@ -739,6 +741,7 @@ pub struct Vmm { original_termios_opt: Arc>>, console_resize_pipe: Option>, console_info: Option, + check_migration_evt: EventFd, } /// Wait for a file descriptor to become readable. In this case, we return @@ -1467,6 +1470,7 @@ impl Vmm { let mut epoll = EpollContext::new().map_err(Error::Epoll)?; let reset_evt = EventFd::new(EFD_NONBLOCK).map_err(Error::EventFdCreate)?; let activate_evt = EventFd::new(EFD_NONBLOCK).map_err(Error::EventFdCreate)?; + let check_migration_evt = EventFd::new(EFD_NONBLOCK).map_err(Error::EventFdCreate)?; epoll .add_event(&exit_evt, EpollDispatch::Exit) @@ -1489,6 +1493,10 @@ impl Vmm { .add_event(&debug_evt, EpollDispatch::Debug) .map_err(Error::Epoll)?; + epoll + .add_event(&check_migration_evt, EpollDispatch::CheckMigration) + .map_err(Error::Epoll)?; + Ok(Vmm { epoll, exit_evt, @@ -1509,6 +1517,7 @@ impl Vmm { original_termios_opt: Arc::new(Mutex::new(None)), console_resize_pipe: None, console_info: None, + check_migration_evt, }) } @@ -2255,6 +2264,14 @@ impl Vmm { } } + /// Checks the migration result. + /// + /// This should be called when the migration thread indicated a state + /// change (and therefore, its termination). The function checks the result + /// of that thread and either shuts down the VMM on success or keeps the VM + /// and the VMM running on migration failure. + fn check_migration_result(&mut self) {} + fn control_loop( &mut self, api_receiver: Rc>, @@ -2348,6 +2365,14 @@ impl Vmm { } #[cfg(not(feature = "guest_debug"))] EpollDispatch::Debug => {} + EpollDispatch::CheckMigration => { + info!("VM migration check event"); + // Consume the event. + self.check_migration_evt + .read() + .map_err(Error::EventFdRead)?; + self.check_migration_result(); + } } } } From c699a56289ec83369726dc86d7c36db264f528f1 Mon Sep 17 00:00:00 2001 From: Philipp Schuster Date: Fri, 21 Nov 2025 10:49:46 +0100 Subject: [PATCH 254/294] vmm: migration: handle in dedicated thread (make async) This puts the send-migration action into a dedicated thread. This means: 1. The send-migration call will exit sooner (just trigger the migration) 2. Other API Call will not be possible as the VM's ownership is transferred from the VMM to the migration thread. E.g., hotplugging won't work (which is good). 3. If the migration causes the VMM process to crash, this currently can't be observed. A mechanism to query the migration status doesn't exist. Signed-off-by: Philipp Schuster On-behalf-of: SAP philipp.schuster@sap.com --- vmm/src/lib.rs | 183 ++++++++++++++++++++++++++++++++++++------------- 1 file changed, 136 insertions(+), 47 deletions(-) diff --git a/vmm/src/lib.rs b/vmm/src/lib.rs index 1e467b9e31..bc3e606502 100644 --- a/vmm/src/lib.rs +++ b/vmm/src/lib.rs @@ -30,6 +30,7 @@ use std::path::PathBuf; use std::rc::Rc; use std::sync::mpsc::{Receiver, RecvError, SendError, Sender, TrySendError}; use std::sync::{Arc, Barrier, Mutex}; +use std::thread::JoinHandle; #[cfg(not(target_arch = "riscv64"))] use std::time::{Duration, Instant}; use std::{io, result, thread}; @@ -714,6 +715,62 @@ impl MigrationState { } } +/// Abstraction for the thread controlling and performing the live migration. +/// +/// The migration thread also takes ownership of the [`Vm`] from the [`Vmm`]. +struct MigrationWorker { + vm: Vm, + check_migration_evt: EventFd, + config: VmSendMigrationData, + #[cfg(all(feature = "kvm", target_arch = "x86_64"))] + hypervisor: Arc, +} + +impl MigrationWorker { + /// Performs any final cleanup after failed live migrations. + /// + /// Helper for [`Self::migrate`]. + fn migrate_error_cleanup(&mut self) -> result::Result<(), MigratableError> { + // Stop logging dirty pages only for non-local migrations + if !self.config.local { + self.vm.stop_dirty_log()?; + } + + Ok(()) + } + + /// Migrate and cleanup. + fn migrate(&mut self) -> result::Result<(), MigratableError> { + debug!("start sending migration"); + Vmm::send_migration( + &mut self.vm, + #[cfg(all(feature = "kvm", target_arch = "x86_64"))] + self.hypervisor.clone(), + self.config.clone(), + ).inspect_err(|_| { + let e = self.migrate_error_cleanup(); + if let Err(e) = e { + error!("Failed to clean up after a failed live migration. VM might keep running but in an odd or possibly slowed-down state: {e}"); + } + })?; + + Ok(()) + } + + /// Perform the migration and communicate with the [`Vmm`] thread. + fn run(mut self) -> (Vm, result::Result<(), MigratableError>) { + debug!("migration thread is starting"); + + let res = self.migrate().inspect_err(|e| error!("migrate error: {e}")); + + // Notify VMM thread to get migration result by joining this thread. + self.check_migration_evt.write(1).unwrap(); + + debug!("migration thread is finished"); + (self.vm, res) + } +} + pub struct VmmThreadHandle { pub thread_handle: thread::JoinHandle>, #[cfg(feature = "dbus_api")] @@ -731,6 +788,11 @@ pub struct Vmm { #[cfg(feature = "guest_debug")] vm_debug_evt: EventFd, version: VmmVersionInfo, + /// The currently running [`Vm`] instance, if any. + /// + /// This is `Some` from the boot to the shutdown of a VM. In the special + /// case of an ongoing live-migration, this is temporarily `None` and held + /// by a guard to prevent modifications to the VM. vm: Option, vm_config: Option>>, seccomp_action: SeccompAction, @@ -742,6 +804,10 @@ pub struct Vmm { console_resize_pipe: Option>, console_info: Option, check_migration_evt: EventFd, + /// Handle to the [`MigrationWorker`] thread. + /// + /// The handle will return the [`Vm`] back in any case. Further, the underlying error (if any) is returned. + migration_thread_handle: Option)>>, } /// Wait for a file descriptor to become readable. In this case, we return @@ -1418,14 +1484,14 @@ impl Vmm { .name("vmm_signal_handler".to_string()) .spawn(move || { if !signal_handler_seccomp_filter.is_empty() && let Err(e) = apply_filter(&signal_handler_seccomp_filter) - .map_err(Error::ApplySeccompFilter) - { - error!("Error applying seccomp filter: {:?}", e); - exit_evt.write(1).ok(); - return; - } - - if landlock_enable{ + .map_err(Error::ApplySeccompFilter) + { + error!("Error applying seccomp filter: {:?}", e); + exit_evt.write(1).ok(); + return; + } + + if landlock_enable { match Landlock::new() { Ok(landlock) => { let _ = landlock.restrict_self().map_err(Error::ApplyLandlock).map_err(|e| { @@ -1443,11 +1509,11 @@ impl Vmm { std::panic::catch_unwind(AssertUnwindSafe(|| { Vmm::signal_handler(signals, original_termios_opt, &exit_evt); })) - .map_err(|_| { - error!("vmm signal_handler thread panicked"); - exit_evt.write(1).ok() - }) - .ok(); + .map_err(|_| { + error!("vmm signal_handler thread panicked"); + exit_evt.write(1).ok() + }) + .ok(); }) .map_err(Error::SignalHandlerSpawn)?, ); @@ -1518,6 +1584,7 @@ impl Vmm { console_resize_pipe: None, console_info: None, check_migration_evt, + migration_thread_handle: None, }) } @@ -2016,6 +2083,11 @@ impl Vmm { Ok(()) } + /// Performs a live-migration. + /// + /// This function performs necessary after-migration cleanup only in the + /// good case. Callers are responsible for properly handling failed + /// migrations. fn send_migration( vm: &mut Vm, #[cfg(all(feature = "kvm", target_arch = "x86_64"))] hypervisor: Arc< @@ -2270,7 +2342,31 @@ impl Vmm { /// change (and therefore, its termination). The function checks the result /// of that thread and either shuts down the VMM on success or keeps the VM /// and the VMM running on migration failure. - fn check_migration_result(&mut self) {} + fn check_migration_result(&mut self) { + // At this point, the thread must be finished. + // If we fail here, we have lost anyway. Just panic. + let (vm, migration_res) = self + .migration_thread_handle + .take() + .expect("should have thread") + .join() + .expect("should have joined"); + + // Give VMM back control. + self.vm = Some(vm); + + match migration_res { + Ok(()) => { + // Shutdown the VM after the migration succeeded + if let Err(e) = self.exit_evt.write(1) { + error!("Failed shutting down the VM after migration: {}", e); + } + } + Err(e) => { + error!("Migration failed: {}", e); + } + } + } fn control_loop( &mut self, @@ -3124,6 +3220,9 @@ impl RequestHandler for Vmm { send_data_migration.destination_url, send_data_migration.local ); + // TODO Check if there is already a migration in progress + // will be done in next commit + if !self .vm_config .as_ref() @@ -3138,42 +3237,32 @@ impl RequestHandler for Vmm { ))); } - if let Some(vm) = self.vm.as_mut() { - Self::send_migration( - vm, - #[cfg(all(feature = "kvm", target_arch = "x86_64"))] - self.hypervisor.clone(), - send_data_migration.clone(), - ) - .map_err(|migration_err| { - error!("Migration failed: {:?}", migration_err); - - // Stop logging dirty pages only for non-local migrations - if !send_data_migration.local - && let Err(e) = vm.stop_dirty_log() - { - return e; - } + // Take VM ownership. This also means that API events can no longer + // change the VM (e.g. net device hotplug). + let vm = self + .vm + .take() + .ok_or(MigratableError::MigrateSend(anyhow!("VM is not running")))?; - if vm.get_state().unwrap() == VmState::Paused - && let Err(e) = vm.resume() - { - return e; - } + // Start migration thread + let worker = MigrationWorker { + vm, + check_migration_evt: self.check_migration_evt.try_clone().unwrap(), + config: send_data_migration, + #[cfg(all(feature = "kvm", target_arch = "x86_64"))] + hypervisor: self.hypervisor.clone(), + }; - migration_err - })?; + self.migration_thread_handle = Some( + thread::Builder::new() + .name("migration".into()) + .spawn(move || worker.run()) + // For upstreaming, we should simply continue and return an + // error when this fails. For our PoC, this is fine. + .unwrap(), + ); - // Shutdown the VM after the migration succeeded - self.exit_evt.write(1).map_err(|e| { - MigratableError::MigrateSend(anyhow!( - "Failed shutting down the VM after migration: {:?}", - e - )) - }) - } else { - Err(MigratableError::MigrateSend(anyhow!("VM is not running"))) - } + Ok(()) } } From bcf56d2aa6eacfb744aa1ac097cb03539f4d5403 Mon Sep 17 00:00:00 2001 From: Philipp Schuster Date: Thu, 30 Oct 2025 12:10:51 +0100 Subject: [PATCH 255/294] vmm: better VM ownership handling in context of live migration The commit prepares to properly handle API events during ongoing live-migrations. The VmInfo call is currently not working when a VM is migrating. This will be addressed in a follow-up as part of statistics migration statistics about ongoing live-migrations. Signed-off-by: Philipp Schuster On-behalf-of: SAP philipp.schuster@sap.com --- vmm/src/lib.rs | 769 ++++++++++++++++++++++++++++--------------------- vmm/src/vm.rs | 3 + 2 files changed, 445 insertions(+), 327 deletions(-) diff --git a/vmm/src/lib.rs b/vmm/src/lib.rs index bc3e606502..3eb0648ba2 100644 --- a/vmm/src/lib.rs +++ b/vmm/src/lib.rs @@ -33,7 +33,7 @@ use std::sync::{Arc, Barrier, Mutex}; use std::thread::JoinHandle; #[cfg(not(target_arch = "riscv64"))] use std::time::{Duration, Instant}; -use std::{io, result, thread}; +use std::{io, mem, result, thread}; use anyhow::anyhow; #[cfg(feature = "dbus_api")] @@ -778,6 +778,41 @@ pub struct VmmThreadHandle { pub http_api_handle: Option, } +/// Describes the current ownership of a running VM. +#[allow(clippy::large_enum_variant)] +pub enum MaybeVmOwnership { + /// The VMM holds the ownership of the VM. + Vmm(Vm), + /// The VM is temporarily blocked by the current ongoing migration. + Migration, + /// No VM is running. + None, +} + +impl MaybeVmOwnership { + /// Takes the VM and replaces it with [`Self::Migration`]. + /// + /// # Panics + /// This method panics if `self` is not [`Self::Vmm`]. + fn take_vm_for_migration(&mut self) -> Vm { + if !matches!(self, Self::Vmm(_)) { + panic!("should only be called when a migration can start"); + } + + match mem::replace(self, Self::Migration) { + MaybeVmOwnership::Vmm(vm) => vm, + _ => unreachable!(), + } + } + + fn vm_mut(&mut self) -> Option<&mut Vm> { + match self { + MaybeVmOwnership::Vmm(vm) => Some(vm), + _ => None, + } + } +} + pub struct Vmm { epoll: EpollContext, exit_evt: EventFd, @@ -788,12 +823,7 @@ pub struct Vmm { #[cfg(feature = "guest_debug")] vm_debug_evt: EventFd, version: VmmVersionInfo, - /// The currently running [`Vm`] instance, if any. - /// - /// This is `Some` from the boot to the shutdown of a VM. In the special - /// case of an ongoing live-migration, this is temporarily `None` and held - /// by a guard to prevent modifications to the VM. - vm: Option, + vm: MaybeVmOwnership, vm_config: Option>>, seccomp_action: SeccompAction, hypervisor: Arc, @@ -1573,7 +1603,7 @@ impl Vmm { #[cfg(feature = "guest_debug")] vm_debug_evt, version: vmm_version, - vm: None, + vm: MaybeVmOwnership::None, vm_config: None, seccomp_action, hypervisor, @@ -1727,7 +1757,7 @@ impl Vmm { Command::Complete => { // The unwrap is safe, because the state machine makes sure we called // vm_receive_state before, which creates the VM. - let vm = self.vm.as_mut().unwrap(); + let vm = self.vm.vm_mut().unwrap(); vm.resume()?; Ok(Completed) } @@ -1898,7 +1928,7 @@ impl Vmm { vm.restore().map_err(|e| { MigratableError::MigrateReceive(anyhow!("Failed restoring the Vm: {}", e)) })?; - self.vm = Some(vm); + self.vm = MaybeVmOwnership::Vmm(vm); Ok(()) } @@ -2272,6 +2302,10 @@ impl Vmm { vm_config: Arc>, prefault: bool, ) -> std::result::Result<(), VmError> { + if matches!(self.vm, MaybeVmOwnership::Migration) { + return Err(VmError::VmMigrating); + } + let snapshot = recv_vm_state(source_url).map_err(VmError::Restore)?; #[cfg(all(feature = "kvm", target_arch = "x86_64"))] let vm_snapshot = get_vm_snapshot(&snapshot).map_err(VmError::Restore)?; @@ -2314,7 +2348,7 @@ impl Vmm { Some(source_url), Some(prefault), )?; - self.vm = Some(vm); + self.vm = MaybeVmOwnership::Vmm(vm); if self .vm_config @@ -2329,11 +2363,8 @@ impl Vmm { } // Now we can restore the rest of the VM. - if let Some(ref mut vm) = self.vm { - vm.restore() - } else { - Err(VmError::VmNotCreated) - } + // PANIC: won't panic, we just checked that the VM is there. + self.vm.vm_mut().unwrap().restore() } /// Checks the migration result. @@ -2353,7 +2384,7 @@ impl Vmm { .expect("should have joined"); // Give VMM back control. - self.vm = Some(vm); + self.vm = MaybeVmOwnership::Vmm(vm); match migration_res { Ok(()) => { @@ -2418,7 +2449,7 @@ impl Vmm { self.vm_reboot().map_err(Error::VmReboot)?; } EpollDispatch::ActivateVirtioDevices => { - if let Some(ref vm) = self.vm { + if let MaybeVmOwnership::Vmm(ref vm) = self.vm { let count = self.activate_evt.read().map_err(Error::EventFdRead)?; info!( "Trying to activate pending virtio devices: count = {}", @@ -2446,7 +2477,7 @@ impl Vmm { // Read from the API receiver channel let gdb_request = gdb_receiver.recv().map_err(Error::GdbRequestRecv)?; - let response = if let Some(ref mut vm) = self.vm { + let response = if let MaybeVmOwnership::Vmm(ref mut vm) = self.vm { vm.debug_request(&gdb_request.payload, gdb_request.cpu_id) } else { Err(VmError::VmNotRunning) @@ -2522,102 +2553,116 @@ impl RequestHandler for Vmm { tracer::start(); info!("Booting VM"); event!("vm", "booting"); - let r = { - trace_scoped!("vm_boot"); - // If we don't have a config, we cannot boot a VM. - if self.vm_config.is_none() { - return Err(VmError::VmMissingConfig); - }; - // console_info is set to None in vm_shutdown. re-populate here if empty - if self.console_info.is_none() { - self.console_info = - Some(pre_create_console_devices(self).map_err(VmError::CreateConsoleDevices)?); - } + if matches!(self.vm, MaybeVmOwnership::Migration) { + return Err(VmError::VmMigrating); + } - // Create a new VM if we don't have one yet. - if self.vm.is_none() { - let exit_evt = self.exit_evt.try_clone().map_err(VmError::EventFdClone)?; - let reset_evt = self.reset_evt.try_clone().map_err(VmError::EventFdClone)?; - #[cfg(feature = "guest_debug")] - let vm_debug_evt = self - .vm_debug_evt - .try_clone() - .map_err(VmError::EventFdClone)?; - let activate_evt = self - .activate_evt - .try_clone() - .map_err(VmError::EventFdClone)?; - - if let Some(ref vm_config) = self.vm_config { - let vm = Vm::new( - Arc::clone(vm_config), - exit_evt, - reset_evt, - #[cfg(feature = "guest_debug")] - vm_debug_evt, - &self.seccomp_action, - self.hypervisor.clone(), - activate_evt, - self.console_info.clone(), - self.console_resize_pipe.clone(), - Arc::clone(&self.original_termios_opt), - None, - None, - None, - )?; - - self.vm = Some(vm); - } + trace_scoped!("vm_boot"); + // If we don't have a config, we cannot boot a VM. + if self.vm_config.is_none() { + return Err(VmError::VmMissingConfig); + }; + + // console_info is set to None in vm_shutdown. re-populate here if empty + if self.console_info.is_none() { + self.console_info = + Some(pre_create_console_devices(self).map_err(VmError::CreateConsoleDevices)?); + } + + // Create a new VM if we don't have one yet. + if matches!(self.vm, MaybeVmOwnership::None) { + let exit_evt = self.exit_evt.try_clone().map_err(VmError::EventFdClone)?; + let reset_evt = self.reset_evt.try_clone().map_err(VmError::EventFdClone)?; + #[cfg(feature = "guest_debug")] + let vm_debug_evt = self + .vm_debug_evt + .try_clone() + .map_err(VmError::EventFdClone)?; + let activate_evt = self + .activate_evt + .try_clone() + .map_err(VmError::EventFdClone)?; + + if let Some(ref vm_config) = self.vm_config { + let vm = Vm::new( + Arc::clone(vm_config), + exit_evt, + reset_evt, + #[cfg(feature = "guest_debug")] + vm_debug_evt, + &self.seccomp_action, + self.hypervisor.clone(), + activate_evt, + self.console_info.clone(), + self.console_resize_pipe.clone(), + Arc::clone(&self.original_termios_opt), + None, + None, + None, + )?; + + self.vm = MaybeVmOwnership::Vmm(vm); } + } - // Now we can boot the VM. - if let Some(ref mut vm) = self.vm { - vm.boot() - } else { - Err(VmError::VmNotCreated) + // Now we can boot the VM. + match self.vm { + MaybeVmOwnership::Vmm(ref mut vm) => { + vm.boot()?; + event!("vm", "booted"); } - }; - tracer::end(); - if r.is_ok() { - event!("vm", "booted"); + MaybeVmOwnership::None => { + return Err(VmError::VmNotCreated); + } + _ => unreachable!(), } - r + + tracer::end(); + Ok(()) } fn vm_pause(&mut self) -> result::Result<(), VmError> { - if let Some(ref mut vm) = self.vm { - vm.pause().map_err(VmError::Pause) - } else { - Err(VmError::VmNotRunning) + match self.vm { + MaybeVmOwnership::Vmm(ref mut vm) => vm.pause().map_err(VmError::Pause), + MaybeVmOwnership::Migration => Err(VmError::VmMigrating)?, + MaybeVmOwnership::None => Err(VmError::VmNotRunning)?, } } fn vm_resume(&mut self) -> result::Result<(), VmError> { - if let Some(ref mut vm) = self.vm { - vm.resume().map_err(VmError::Resume) - } else { - Err(VmError::VmNotRunning) + match self.vm { + MaybeVmOwnership::Vmm(ref mut vm) => vm.resume().map_err(VmError::Resume), + MaybeVmOwnership::Migration => Err(VmError::VmMigrating)?, + MaybeVmOwnership::None => Err(VmError::VmNotRunning)?, } } fn vm_snapshot(&mut self, destination_url: &str) -> result::Result<(), VmError> { - if let Some(ref mut vm) = self.vm { - // Drain console_info so that FDs are not reused - let _ = self.console_info.take(); - vm.snapshot() - .map_err(VmError::Snapshot) - .and_then(|snapshot| { - vm.send(&snapshot, destination_url) - .map_err(VmError::SnapshotSend) - }) - } else { - Err(VmError::VmNotRunning) + match self.vm { + MaybeVmOwnership::Vmm(ref mut vm) => { + // Drain console_info so that FDs are not reused + let _ = self.console_info.take(); + vm.snapshot() + .map_err(VmError::Snapshot) + .and_then(|snapshot| { + vm.send(&snapshot, destination_url) + .map_err(VmError::SnapshotSend) + }) + } + MaybeVmOwnership::Migration => Err(VmError::VmMigrating)?, + MaybeVmOwnership::None => Err(VmError::VmNotRunning)?, } } fn vm_restore(&mut self, restore_cfg: RestoreConfig) -> result::Result<(), VmError> { - if self.vm.is_some() || self.vm_config.is_some() { + match &self.vm { + MaybeVmOwnership::Vmm(_vm) => return Err(VmError::VmAlreadyCreated), + MaybeVmOwnership::Migration => return Err(VmError::VmMigrating), + MaybeVmOwnership::None => (), + }; + + if self.vm_config.is_some() { return Err(VmError::VmAlreadyCreated); } @@ -2664,21 +2709,25 @@ impl RequestHandler for Vmm { #[cfg(all(target_arch = "x86_64", feature = "guest_debug"))] fn vm_coredump(&mut self, destination_url: &str) -> result::Result<(), VmError> { - if let Some(ref mut vm) = self.vm { - vm.coredump(destination_url).map_err(VmError::Coredump) - } else { - Err(VmError::VmNotRunning) + match self.vm { + MaybeVmOwnership::Vmm(ref mut vm) => { + vm.coredump(destination_url).map_err(VmError::Coredump) + } + MaybeVmOwnership::Migration => Err(VmError::VmMigrating), + MaybeVmOwnership::None => Err(VmError::VmNotRunning), } } fn vm_shutdown(&mut self) -> result::Result<(), VmError> { - let r = if let Some(ref mut vm) = self.vm.take() { - // Drain console_info so that the FDs are not reused - let _ = self.console_info.take(); - vm.shutdown() - } else { - Err(VmError::VmNotRunning) + let vm = match self.vm { + MaybeVmOwnership::Vmm(ref mut vm) => vm, + MaybeVmOwnership::Migration => return Err(VmError::VmMigrating), + MaybeVmOwnership::None => return Err(VmError::VmNotRunning), }; + // Drain console_info so that the FDs are not reused + let _ = self.console_info.take(); + let r = vm.shutdown(); + self.vm = MaybeVmOwnership::None; if r.is_ok() { event!("vm", "shutdown"); @@ -2691,13 +2740,14 @@ impl RequestHandler for Vmm { event!("vm", "rebooting"); // First we stop the current VM - let config = if let Some(mut vm) = self.vm.take() { - let config = vm.get_config(); - vm.shutdown()?; - config - } else { - return Err(VmError::VmNotCreated); + let vm = match self.vm { + MaybeVmOwnership::Vmm(ref mut vm) => vm, + MaybeVmOwnership::Migration => return Err(VmError::VmMigrating), + MaybeVmOwnership::None => return Err(VmError::VmNotRunning), }; + let config = vm.get_config(); + vm.shutdown()?; + self.vm = MaybeVmOwnership::None; // vm.shutdown() closes all the console devices, so set console_info to None // so that the closed FD #s are not reused. @@ -2746,7 +2796,7 @@ impl RequestHandler for Vmm { // And we boot it vm.boot()?; - self.vm = Some(vm); + self.vm = MaybeVmOwnership::Vmm(vm); event!("vm", "rebooted"); @@ -2754,33 +2804,38 @@ impl RequestHandler for Vmm { } fn vm_info(&self) -> result::Result { - match &self.vm_config { - Some(vm_config) => { - let state = match &self.vm { - Some(vm) => vm.get_state()?, - None => VmState::Created, - }; - let config = vm_config.lock().unwrap().clone(); - - let mut memory_actual_size = config.memory.total_size(); - if let Some(vm) = &self.vm { - memory_actual_size -= vm.balloon_size(); - } + let vm_config = self.vm_config.as_ref().ok_or(VmError::VmNotCreated)?; + let vm_config = vm_config.lock().unwrap().clone(); + + let state = match &self.vm { + MaybeVmOwnership::Vmm(vm) => vm.get_state()?, + // TODO in theory one could live-migrate a non-running VM .. + MaybeVmOwnership::Migration => VmState::Running, + MaybeVmOwnership::None => VmState::Created, + }; - let device_tree = self - .vm - .as_ref() - .map(|vm| vm.device_tree().lock().unwrap().clone()); - - Ok(VmInfoResponse { - config: Box::new(config), - state, - memory_actual_size, - device_tree, - }) + let mut memory_actual_size = vm_config.memory.total_size(); + match &self.vm { + MaybeVmOwnership::Vmm(vm) => { + memory_actual_size -= vm.balloon_size(); } - None => Err(VmError::VmNotCreated), + MaybeVmOwnership::Migration => {} + MaybeVmOwnership::None => {} } + + let device_tree = match &self.vm { + MaybeVmOwnership::Vmm(vm) => Some(vm.device_tree().lock().unwrap().clone()), + // TODO we need to fix this + MaybeVmOwnership::Migration => None, + MaybeVmOwnership::None => None, + }; + + Ok(VmInfoResponse { + config: Box::new(vm_config), + state, + memory_actual_size, + device_tree, + }) } fn vmm_ping(&self) -> VmmPingResponse { @@ -2802,14 +2857,19 @@ impl RequestHandler for Vmm { return Ok(()); } - // If a VM is booted, we first try to shut it down. - if self.vm.is_some() { - self.vm_shutdown()?; - } - - self.vm_config = None; + match &self.vm { + MaybeVmOwnership::Vmm(_vm) => { + event!("vm", "deleted"); - event!("vm", "deleted"); + // If a VM is booted, we first try to shut it down. + self.vm_shutdown()?; + self.vm_config = None; + } + MaybeVmOwnership::None => { + self.vm_config = None; + } + MaybeVmOwnership::Migration => Err(VmError::VmMigrating)?, + } Ok(()) } @@ -2832,64 +2892,73 @@ impl RequestHandler for Vmm { todo!("doesn't work currently with our thread-local KVM_RUN approach"); } - if let Some(ref mut vm) = self.vm { - if let Err(e) = vm.resize(desired_vcpus, desired_ram, desired_balloon) { - error!("Error when resizing VM: {:?}", e); - Err(e) - } else { - Ok(()) - } - } else { - let mut config = self.vm_config.as_ref().unwrap().lock().unwrap(); - if let Some(desired_vcpus) = desired_vcpus { - config.cpus.boot_vcpus = desired_vcpus; - } - if let Some(desired_ram) = desired_ram { - config.memory.size = desired_ram; + match self.vm { + MaybeVmOwnership::Vmm(ref mut vm) => { + if let Err(e) = vm.resize(desired_vcpus, desired_ram, desired_balloon) { + error!("Error when resizing VM: {:?}", e); + Err(e) + } else { + Ok(()) + } } - if let Some(desired_balloon) = desired_balloon - && let Some(balloon_config) = &mut config.balloon - { - balloon_config.size = desired_balloon; + MaybeVmOwnership::Migration => Err(VmError::VmMigrating), + MaybeVmOwnership::None => { + let mut config = self.vm_config.as_ref().unwrap().lock().unwrap(); + if let Some(desired_vcpus) = desired_vcpus { + config.cpus.boot_vcpus = desired_vcpus; + } + if let Some(desired_ram) = desired_ram { + config.memory.size = desired_ram; + } + if let Some(desired_balloon) = desired_balloon + && let Some(balloon_config) = &mut config.balloon + { + balloon_config.size = desired_balloon; + } + + Ok(()) } - Ok(()) } } fn vm_resize_disk(&mut self, id: String, desired_size: u64) -> result::Result<(), VmError> { self.vm_config.as_ref().ok_or(VmError::VmNotCreated)?; - if let Some(ref mut vm) = self.vm { - return vm.resize_disk(id, desired_size); + match self.vm { + MaybeVmOwnership::Vmm(ref mut vm) => vm.resize_disk(id, desired_size), + MaybeVmOwnership::Migration => Err(VmError::VmMigrating), + MaybeVmOwnership::None => Err(VmError::ResizeDisk), } - - Err(VmError::ResizeDisk) } fn vm_resize_zone(&mut self, id: String, desired_ram: u64) -> result::Result<(), VmError> { self.vm_config.as_ref().ok_or(VmError::VmNotCreated)?; - if let Some(ref mut vm) = self.vm { - if let Err(e) = vm.resize_zone(id, desired_ram) { - error!("Error when resizing VM: {:?}", e); - Err(e) - } else { - Ok(()) + match self.vm { + MaybeVmOwnership::Vmm(ref mut vm) => { + if let Err(e) = vm.resize_zone(id, desired_ram) { + error!("Error when resizing VM: {:?}", e); + Err(e) + } else { + Ok(()) + } } - } else { - // Update VmConfig by setting the new desired ram. - let memory_config = &mut self.vm_config.as_ref().unwrap().lock().unwrap().memory; - - if let Some(zones) = &mut memory_config.zones { - for zone in zones.iter_mut() { - if zone.id == id { - zone.size = desired_ram; - return Ok(()); + MaybeVmOwnership::Migration => Err(VmError::VmMigrating), + MaybeVmOwnership::None => { + // Update VmConfig by setting the new desired ram. + let memory_config = &mut self.vm_config.as_ref().unwrap().lock().unwrap().memory; + + if let Some(zones) = &mut memory_config.zones { + for zone in zones.iter_mut() { + if zone.id == id { + zone.size = desired_ram; + return Ok(()); + } } } - } - error!("Could not find the memory zone {} for the resize", id); - Err(VmError::ResizeZone) + error!("Could not find the memory zone {} for the resize", id); + Err(VmError::ResizeZone) + } } } @@ -2906,19 +2975,23 @@ impl RequestHandler for Vmm { config.validate().map_err(VmError::ConfigValidation)?; } - if let Some(ref mut vm) = self.vm { - let info = vm.add_device(device_cfg).map_err(|e| { - error!("Error when adding new device to the VM: {:?}", e); - e - })?; - serde_json::to_vec(&info) - .map(Some) - .map_err(VmError::SerializeJson) - } else { - // Update VmConfig by adding the new device. - let mut config = self.vm_config.as_ref().unwrap().lock().unwrap(); - add_to_config(&mut config.devices, device_cfg); - Ok(None) + match self.vm { + MaybeVmOwnership::Vmm(ref mut vm) => { + let info = vm.add_device(device_cfg).map_err(|e| { + error!("Error when adding new device to the VM: {:?}", e); + e + })?; + serde_json::to_vec(&info) + .map(Some) + .map_err(VmError::SerializeJson) + } + MaybeVmOwnership::Migration => Err(VmError::VmMigrating), + MaybeVmOwnership::None => { + // Update VmConfig by adding the new device. + let mut config = self.vm_config.as_ref().unwrap().lock().unwrap(); + add_to_config(&mut config.devices, device_cfg); + Ok(None) + } } } @@ -2935,39 +3008,49 @@ impl RequestHandler for Vmm { config.validate().map_err(VmError::ConfigValidation)?; } - if let Some(ref mut vm) = self.vm { - let info = vm.add_user_device(device_cfg).map_err(|e| { - error!("Error when adding new user device to the VM: {:?}", e); - e - })?; - serde_json::to_vec(&info) - .map(Some) - .map_err(VmError::SerializeJson) - } else { - // Update VmConfig by adding the new device. - let mut config = self.vm_config.as_ref().unwrap().lock().unwrap(); - add_to_config(&mut config.user_devices, device_cfg); - Ok(None) + match self.vm { + MaybeVmOwnership::Vmm(ref mut vm) => { + let info = vm.add_user_device(device_cfg).map_err(|e| { + error!("Error when adding new user device to the VM: {:?}", e); + e + })?; + serde_json::to_vec(&info) + .map(Some) + .map_err(VmError::SerializeJson) + } + MaybeVmOwnership::Migration => Err(VmError::VmMigrating), + MaybeVmOwnership::None => { + // Update VmConfig by adding the new device. + let mut config = self.vm_config.as_ref().unwrap().lock().unwrap(); + add_to_config(&mut config.user_devices, device_cfg); + Ok(None) + } } } fn vm_remove_device(&mut self, id: String) -> result::Result<(), VmError> { - if let Some(ref mut vm) = self.vm { - if let Err(e) = vm.remove_device(id) { - error!("Error when removing device from the VM: {:?}", e); - Err(e) - } else { - Ok(()) + match self.vm { + MaybeVmOwnership::Vmm(ref mut vm) => { + if let Err(e) = vm.remove_device(id) { + error!("Error when removing device from the VM: {:?}", e); + Err(e) + } else { + Ok(()) + } } - } else if let Some(ref config) = self.vm_config { - let mut config = config.lock().unwrap(); - if config.remove_device(&id) { - Ok(()) - } else { - Err(VmError::NoDeviceToRemove(id)) + MaybeVmOwnership::Migration => Err(VmError::VmMigrating), + MaybeVmOwnership::None => { + if let Some(ref config) = self.vm_config { + let mut config = config.lock().unwrap(); + if config.remove_device(&id) { + Ok(()) + } else { + Err(VmError::NoDeviceToRemove(id)) + } + } else { + Err(VmError::VmNotCreated) + } } - } else { - Err(VmError::VmNotCreated) } } @@ -2981,19 +3064,23 @@ impl RequestHandler for Vmm { config.validate().map_err(VmError::ConfigValidation)?; } - if let Some(ref mut vm) = self.vm { - let info = vm.add_disk(disk_cfg).map_err(|e| { - error!("Error when adding new disk to the VM: {:?}", e); - e - })?; - serde_json::to_vec(&info) - .map(Some) - .map_err(VmError::SerializeJson) - } else { - // Update VmConfig by adding the new device. - let mut config = self.vm_config.as_ref().unwrap().lock().unwrap(); - add_to_config(&mut config.disks, disk_cfg); - Ok(None) + match self.vm { + MaybeVmOwnership::Vmm(ref mut vm) => { + let info = vm.add_disk(disk_cfg).map_err(|e| { + error!("Error when adding new disk to the VM: {:?}", e); + e + })?; + serde_json::to_vec(&info) + .map(Some) + .map_err(VmError::SerializeJson) + } + MaybeVmOwnership::Migration => Err(VmError::VmMigrating), + MaybeVmOwnership::None => { + // Update VmConfig by adding the new device. + let mut config = self.vm_config.as_ref().unwrap().lock().unwrap(); + add_to_config(&mut config.disks, disk_cfg); + Ok(None) + } } } @@ -3007,19 +3094,23 @@ impl RequestHandler for Vmm { config.validate().map_err(VmError::ConfigValidation)?; } - if let Some(ref mut vm) = self.vm { - let info = vm.add_fs(fs_cfg).map_err(|e| { - error!("Error when adding new fs to the VM: {:?}", e); - e - })?; - serde_json::to_vec(&info) - .map(Some) - .map_err(VmError::SerializeJson) - } else { - // Update VmConfig by adding the new device. - let mut config = self.vm_config.as_ref().unwrap().lock().unwrap(); - add_to_config(&mut config.fs, fs_cfg); - Ok(None) + match self.vm { + MaybeVmOwnership::Vmm(ref mut vm) => { + let info = vm.add_fs(fs_cfg).map_err(|e| { + error!("Error when adding new fs to the VM: {:?}", e); + e + })?; + serde_json::to_vec(&info) + .map(Some) + .map_err(VmError::SerializeJson) + } + MaybeVmOwnership::Migration => Err(VmError::VmMigrating), + MaybeVmOwnership::None => { + // Update VmConfig by adding the new device. + let mut config = self.vm_config.as_ref().unwrap().lock().unwrap(); + add_to_config(&mut config.fs, fs_cfg); + Ok(None) + } } } @@ -3033,19 +3124,23 @@ impl RequestHandler for Vmm { config.validate().map_err(VmError::ConfigValidation)?; } - if let Some(ref mut vm) = self.vm { - let info = vm.add_pmem(pmem_cfg).map_err(|e| { - error!("Error when adding new pmem device to the VM: {:?}", e); - e - })?; - serde_json::to_vec(&info) - .map(Some) - .map_err(VmError::SerializeJson) - } else { - // Update VmConfig by adding the new device. - let mut config = self.vm_config.as_ref().unwrap().lock().unwrap(); - add_to_config(&mut config.pmem, pmem_cfg); - Ok(None) + match self.vm { + MaybeVmOwnership::Vmm(ref mut vm) => { + let info = vm.add_pmem(pmem_cfg).map_err(|e| { + error!("Error when adding new pmem device to the VM: {:?}", e); + e + })?; + serde_json::to_vec(&info) + .map(Some) + .map_err(VmError::SerializeJson) + } + MaybeVmOwnership::Migration => Err(VmError::VmMigrating), + MaybeVmOwnership::None => { + // Update VmConfig by adding the new device. + let mut config = self.vm_config.as_ref().unwrap().lock().unwrap(); + add_to_config(&mut config.pmem, pmem_cfg); + Ok(None) + } } } @@ -3059,19 +3154,23 @@ impl RequestHandler for Vmm { config.validate().map_err(VmError::ConfigValidation)?; } - if let Some(ref mut vm) = self.vm { - let info = vm.add_net(net_cfg).map_err(|e| { - error!("Error when adding new network device to the VM: {:?}", e); - e - })?; - serde_json::to_vec(&info) - .map(Some) - .map_err(VmError::SerializeJson) - } else { - // Update VmConfig by adding the new device. - let mut config = self.vm_config.as_ref().unwrap().lock().unwrap(); - add_to_config(&mut config.net, net_cfg); - Ok(None) + match self.vm { + MaybeVmOwnership::Vmm(ref mut vm) => { + let info = vm.add_net(net_cfg).map_err(|e| { + error!("Error when adding new network device to the VM: {:?}", e); + e + })?; + serde_json::to_vec(&info) + .map(Some) + .map_err(VmError::SerializeJson) + } + MaybeVmOwnership::Migration => Err(VmError::VmMigrating), + MaybeVmOwnership::None => { + // Update VmConfig by adding the new device. + let mut config = self.vm_config.as_ref().unwrap().lock().unwrap(); + add_to_config(&mut config.net, net_cfg); + Ok(None) + } } } @@ -3085,19 +3184,23 @@ impl RequestHandler for Vmm { config.validate().map_err(VmError::ConfigValidation)?; } - if let Some(ref mut vm) = self.vm { - let info = vm.add_vdpa(vdpa_cfg).map_err(|e| { - error!("Error when adding new vDPA device to the VM: {:?}", e); - e - })?; - serde_json::to_vec(&info) - .map(Some) - .map_err(VmError::SerializeJson) - } else { - // Update VmConfig by adding the new device. - let mut config = self.vm_config.as_ref().unwrap().lock().unwrap(); - add_to_config(&mut config.vdpa, vdpa_cfg); - Ok(None) + match self.vm { + MaybeVmOwnership::Vmm(ref mut vm) => { + let info = vm.add_vdpa(vdpa_cfg).map_err(|e| { + error!("Error when adding new vDPA device to the VM: {:?}", e); + e + })?; + serde_json::to_vec(&info) + .map(Some) + .map_err(VmError::SerializeJson) + } + MaybeVmOwnership::Migration => Err(VmError::VmMigrating), + MaybeVmOwnership::None => { + // Update VmConfig by adding the new device. + let mut config = self.vm_config.as_ref().unwrap().lock().unwrap(); + add_to_config(&mut config.vdpa, vdpa_cfg); + Ok(None) + } } } @@ -3116,49 +3219,55 @@ impl RequestHandler for Vmm { config.validate().map_err(VmError::ConfigValidation)?; } - if let Some(ref mut vm) = self.vm { - let info = vm.add_vsock(vsock_cfg).map_err(|e| { - error!("Error when adding new vsock device to the VM: {:?}", e); - e - })?; - serde_json::to_vec(&info) - .map(Some) - .map_err(VmError::SerializeJson) - } else { - // Update VmConfig by adding the new device. - let mut config = self.vm_config.as_ref().unwrap().lock().unwrap(); - config.vsock = Some(vsock_cfg); - Ok(None) + match self.vm { + MaybeVmOwnership::Vmm(ref mut vm) => { + let info = vm.add_vsock(vsock_cfg).map_err(|e| { + error!("Error when adding new vsock device to the VM: {:?}", e); + e + })?; + serde_json::to_vec(&info) + .map(Some) + .map_err(VmError::SerializeJson) + } + MaybeVmOwnership::Migration => Err(VmError::VmMigrating), + MaybeVmOwnership::None => { + // Update VmConfig by adding the new device. + let mut config = self.vm_config.as_ref().unwrap().lock().unwrap(); + config.vsock = Some(vsock_cfg); + Ok(None) + } } } fn vm_counters(&mut self) -> result::Result>, VmError> { - if let Some(ref mut vm) = self.vm { - let info = vm.counters().map_err(|e| { - error!("Error when getting counters from the VM: {:?}", e); - e - })?; - serde_json::to_vec(&info) - .map(Some) - .map_err(VmError::SerializeJson) - } else { - Err(VmError::VmNotRunning) + match self.vm { + MaybeVmOwnership::Vmm(ref mut vm) => { + let info = vm.counters().map_err(|e| { + error!("Error when getting counters from the VM: {:?}", e); + e + })?; + serde_json::to_vec(&info) + .map(Some) + .map_err(VmError::SerializeJson) + } + MaybeVmOwnership::Migration => Err(VmError::VmMigrating), + MaybeVmOwnership::None => Err(VmError::VmNotRunning), } } fn vm_power_button(&mut self) -> result::Result<(), VmError> { - if let Some(ref mut vm) = self.vm { - vm.power_button() - } else { - Err(VmError::VmNotRunning) + match self.vm { + MaybeVmOwnership::Vmm(ref mut vm) => vm.power_button(), + MaybeVmOwnership::Migration => Err(VmError::VmMigrating), + MaybeVmOwnership::None => Err(VmError::VmNotRunning), } } fn vm_nmi(&mut self) -> result::Result<(), VmError> { - if let Some(ref mut vm) = self.vm { - vm.nmi() - } else { - Err(VmError::VmNotRunning) + match self.vm { + MaybeVmOwnership::Vmm(ref mut vm) => vm.nmi(), + MaybeVmOwnership::Migration => Err(VmError::VmMigrating), + MaybeVmOwnership::None => Err(VmError::VmNotRunning), } } @@ -3204,7 +3313,7 @@ impl RequestHandler for Vmm { } if let ReceiveMigrationState::Aborted = state { - self.vm = None; + self.vm = MaybeVmOwnership::None; self.vm_config = None; } @@ -3215,14 +3324,23 @@ impl RequestHandler for Vmm { &mut self, send_data_migration: VmSendMigrationData, ) -> result::Result<(), MigratableError> { + match self.vm { + MaybeVmOwnership::Vmm(_) => (), + MaybeVmOwnership::Migration => { + return Err(MigratableError::MigrateSend(anyhow!( + "There is already an ongoing migration" + ))); + } + MaybeVmOwnership::None => { + return Err(MigratableError::MigrateSend(anyhow!("VM is not running"))); + } + }; + info!( "Sending migration: destination_url = {}, local = {}", send_data_migration.destination_url, send_data_migration.local ); - // TODO Check if there is already a migration in progress - // will be done in next commit - if !self .vm_config .as_ref() @@ -3239,10 +3357,7 @@ impl RequestHandler for Vmm { // Take VM ownership. This also means that API events can no longer // change the VM (e.g. net device hotplug). - let vm = self - .vm - .take() - .ok_or(MigratableError::MigrateSend(anyhow!("VM is not running")))?; + let vm = self.vm.take_vm_for_migration(); // Start migration thread let worker = MigrationWorker { diff --git a/vmm/src/vm.rs b/vmm/src/vm.rs index 6e325d9c46..a776d0d943 100644 --- a/vmm/src/vm.rs +++ b/vmm/src/vm.rs @@ -177,6 +177,9 @@ pub enum Error { #[error("VM is not running")] VmNotRunning, + #[error("VM is currently migrating and can't be modified")] + VmMigrating, + #[error("Cannot clone EventFd")] EventFdClone(#[source] io::Error), From 9df48f73e9b22dd4b2049628ad5c1a046adcca4d Mon Sep 17 00:00:00 2001 From: Philipp Schuster Date: Thu, 30 Oct 2025 12:23:25 +0100 Subject: [PATCH 256/294] vmm: api: temporarily make VmSendMigration call blocking again Once we have a mechanism to query the progress of an ongoing live-migration, we can remove this workaround. Signed-off-by: Philipp Schuster On-behalf-of: SAP philipp.schuster@sap.com --- vmm/src/api/http/http_endpoint.rs | 54 +++++++++++++++++++++++++++++-- vmm/src/lib.rs | 25 +++++++++++++- vmm/src/seccomp_filters.rs | 1 + 3 files changed, 77 insertions(+), 3 deletions(-) diff --git a/vmm/src/api/http/http_endpoint.rs b/vmm/src/api/http/http_endpoint.rs index 0daf7f7af5..35ef0ba946 100644 --- a/vmm/src/api/http/http_endpoint.rs +++ b/vmm/src/api/http/http_endpoint.rs @@ -32,11 +32,22 @@ use std::fs::File; use std::os::unix::io::IntoRawFd; -use std::sync::mpsc::Sender; +use std::sync::mpsc::{Receiver, Sender, SyncSender}; +use std::sync::{LazyLock, Mutex}; use micro_http::{Body, Method, Request, Response, StatusCode, Version}; use vmm_sys_util::eventfd::EventFd; +/// Helper to make the VmSendMigration call blocking as long as a migration is ongoing. +#[allow(clippy::type_complexity)] +pub static ONGOING_LIVEMIGRATION: LazyLock<( + SyncSender>, + Mutex>>, +)> = LazyLock::new(|| { + let (sender, receiver) = std::sync::mpsc::sync_channel(0); + (sender, Mutex::new(receiver)) +}); + #[cfg(all(target_arch = "x86_64", feature = "guest_debug"))] use crate::api::VmCoredump; use crate::api::http::{EndpointHandler, HttpError, error_response}; @@ -230,7 +241,6 @@ vm_action_put_handler_body!(VmRemoveDevice); vm_action_put_handler_body!(VmResizeDisk); vm_action_put_handler_body!(VmResizeZone); vm_action_put_handler_body!(VmSnapshot); -vm_action_put_handler_body!(VmSendMigration); #[cfg(all(target_arch = "x86_64", feature = "guest_debug"))] vm_action_put_handler_body!(VmCoredump); @@ -382,6 +392,46 @@ impl PutHandler for VmReceiveMigration { impl GetHandler for VmReceiveMigration {} +// Special Handling for virtio-net Devices Backed by Network File Descriptors +// +// See above. +impl PutHandler for VmSendMigration { + fn handle_request( + &'static self, + api_notifier: EventFd, + api_sender: Sender, + body: &Option, + _files: Vec, + ) -> std::result::Result, HttpError> { + if let Some(body) = body { + let res = self + .send( + api_notifier, + api_sender, + serde_json::from_slice(body.raw())?, + ) + .map_err(HttpError::ApiError)?; + + info!("live migration started"); + + let (_, receiver) = &*ONGOING_LIVEMIGRATION; + + info!("waiting for live migration result"); + let mig_res = receiver.lock().unwrap().recv().unwrap(); + info!("received live migration result"); + + // We forward the migration error here to the guest + mig_res + .map(|_| res) + .map_err(|e| HttpError::ApiError(ApiError::VmSendMigration(e))) + } else { + Err(HttpError::BadRequest) + } + } +} + +impl GetHandler for VmSendMigration {} + impl PutHandler for VmResize { fn handle_request( &'static self, diff --git a/vmm/src/lib.rs b/vmm/src/lib.rs index 3eb0648ba2..7ec33a23df 100644 --- a/vmm/src/lib.rs +++ b/vmm/src/lib.rs @@ -64,6 +64,7 @@ use vmm_sys_util::eventfd::EventFd; use vmm_sys_util::signal::unblock_signal; use vmm_sys_util::sock_ctrl_msg::ScmSocket; +use crate::api::http::http_endpoint::ONGOING_LIVEMIGRATION; use crate::api::{ ApiRequest, ApiResponse, RequestHandler, VmInfoResponse, VmReceiveMigrationData, VmSendMigrationData, VmmPingResponse, @@ -2388,6 +2389,13 @@ impl Vmm { match migration_res { Ok(()) => { + { + info!("Sending Receiver in HTTP thread that migration succeeded"); + let (sender, _) = &*ONGOING_LIVEMIGRATION; + // unblock API call; propagate migration result + sender.send(Ok(())).unwrap(); + } + // Shutdown the VM after the migration succeeded if let Err(e) = self.exit_evt.write(1) { error!("Failed shutting down the VM after migration: {}", e); @@ -2395,6 +2403,13 @@ impl Vmm { } Err(e) => { error!("Migration failed: {}", e); + { + info!("Sending Receiver in HTTP thread that migration failed"); + let (sender, _) = &*ONGOING_LIVEMIGRATION; + // unblock API call; propagate migration result + sender.send(Err(e)).unwrap(); + } + // we don't fail the VMM here, it just continues running its VM } } } @@ -2922,10 +2937,18 @@ impl RequestHandler for Vmm { } fn vm_resize_disk(&mut self, id: String, desired_size: u64) -> result::Result<(), VmError> { + info!("request to resize disk: id={id}"); self.vm_config.as_ref().ok_or(VmError::VmNotCreated)?; match self.vm { - MaybeVmOwnership::Vmm(ref mut vm) => vm.resize_disk(id, desired_size), + MaybeVmOwnership::Vmm(ref mut vm) => { + if let Err(e) = vm.resize_disk(id, desired_size) { + error!("Error when resizing disk: {:?}", e); + Err(e) + } else { + Ok(()) + } + } MaybeVmOwnership::Migration => Err(VmError::VmMigrating), MaybeVmOwnership::None => Err(VmError::ResizeDisk), } diff --git a/vmm/src/seccomp_filters.rs b/vmm/src/seccomp_filters.rs index 948d978dda..f0bc1e1ef4 100644 --- a/vmm/src/seccomp_filters.rs +++ b/vmm/src/seccomp_filters.rs @@ -865,6 +865,7 @@ fn http_api_thread_rules() -> Result)>, BackendError> (libc::SYS_write, vec![]), (libc::SYS_rt_sigprocmask, vec![]), (libc::SYS_getcwd, vec![]), + (libc::SYS_clock_nanosleep, vec![]), ]) } From dc905d98b7c9903a86f78a31754effe9548d335b Mon Sep 17 00:00:00 2001 From: Philipp Schuster Date: Tue, 11 Nov 2025 14:21:11 +0100 Subject: [PATCH 257/294] misc: fix typo and wording Signed-off-by: Philipp Schuster On-behalf-of: SAP philipp.schuster@sap.com --- vm-migration/src/lib.rs | 2 +- vmm/src/api/mod.rs | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/vm-migration/src/lib.rs b/vm-migration/src/lib.rs index 2e5f6e5791..07f409f8d0 100644 --- a/vm-migration/src/lib.rs +++ b/vm-migration/src/lib.rs @@ -50,7 +50,7 @@ pub enum MigratableError { #[error("Failed to complete migration for migratable component")] CompleteMigration(#[source] anyhow::Error), - #[error("Failed to release a disk lock before the migration")] + #[error("Failed to release a disk lock")] UnlockError(#[source] anyhow::Error), } diff --git a/vmm/src/api/mod.rs b/vmm/src/api/mod.rs index 0b0be98b86..c499c74c76 100644 --- a/vmm/src/api/mod.rs +++ b/vmm/src/api/mod.rs @@ -118,8 +118,8 @@ pub enum ApiError { #[error("The VM could not be snapshotted")] VmSnapshot(#[source] VmError), - /// The VM could not restored. - #[error("The VM could not restored")] + /// The VM could not be restored. + #[error("The VM could not be restored")] VmRestore(#[source] VmError), /// The VM could not be coredumped. From a8ae332c884007df2953196d18d35d7544c13f4a Mon Sep 17 00:00:00 2001 From: Philipp Schuster Date: Mon, 1 Dec 2025 15:48:45 +0100 Subject: [PATCH 258/294] ch-remote: fix panic in send-migration Not sure why exactly it panicked but this is a working fix. Orignal error: ``` thread 'main' (66708) panicked at src/bin/ch-remote.rs:512:22: Mismatch between definition and access of `connections`. Could not downcast to core::num::nonzero::NonZero, need to downcast to u32 ``` Signed-off-by: Philipp Schuster On-behalf-of: SAP philipp.schuster@sap.com --- src/bin/ch-remote.rs | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/src/bin/ch-remote.rs b/src/bin/ch-remote.rs index 92c63900f2..74bcd80fb7 100644 --- a/src/bin/ch-remote.rs +++ b/src/bin/ch-remote.rs @@ -506,11 +506,13 @@ fn rest_api_do_command(matches: &ArgMatches, socket: &mut UnixStream) -> ApiResu .unwrap() .get_one::("migration-timeout-s") .unwrap_or(&3600), - *matches + matches .subcommand_matches("send-migration") .unwrap() - .get_one::("connections") - .unwrap_or(&NonZeroU32::new(1).unwrap()), + .get_one::("connections") + .copied() + .and_then(NonZeroU32::new) + .unwrap_or(NonZeroU32::new(1).unwrap()), ); simple_api_command(socket, "PUT", "send-migration", Some(&send_migration_data)) .map_err(Error::HttpApiClient) From 179896c81064f52dd45d4a631bbf43ef3687970b Mon Sep 17 00:00:00 2001 From: Oliver Anderson Date: Mon, 1 Dec 2025 12:31:06 +0100 Subject: [PATCH 259/294] hypervisor: Introduce error associated with AMX related state components In order for guests to use AMX it is necessary to ask the kernel to enable the related state components for guests. While cloud hypervisor already does this, we would prefer to extract the logic into a stand alone (reusable) function. In this commit we only introduce the error type that will later be part of the enable_amx_state_components function's signature. Signed-Off-by: Oliver Anderson On-behalf-of: SAP oliver.anderson@sap.com --- hypervisor/src/arch/x86/mod.rs | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/hypervisor/src/arch/x86/mod.rs b/hypervisor/src/arch/x86/mod.rs index c337624621..6b5502757d 100644 --- a/hypervisor/src/arch/x86/mod.rs +++ b/hypervisor/src/arch/x86/mod.rs @@ -13,6 +13,8 @@ use core::fmt; +use thiserror::Error; + #[cfg(all(feature = "mshv_emulator", target_arch = "x86_64"))] pub mod emulator; pub mod gdt; @@ -306,6 +308,26 @@ pub struct MsrEntry { pub data: u64, } +/// Error that may be returned when attempting to enable AMX state components for guests +#[derive(Debug, Error)] +pub enum AmxGuestSupportError { + /// Attempted to enable AMX on a CPU from a vendor that is not known to support AMX features. + #[error("The host CPU's vendor does not support AMX features. Only Intel provides such CPUs.")] + VendorDoesNotSupportAmx, + /// Unable to verify that the host supports AMX. + #[error("The host does not support AMX tile state components: errno={errno}")] + AmxNotSupported { errno: i64 }, + /// The syscall to check for AMX tile state support succeeded, but the returned + /// features did not match our expectations. + #[error( + "Could not verify AMX support. These are the supported features that were reported: features={features}" + )] + InvalidAmxTileFeatureCheck { features: usize }, + /// The request to enable AMX related state components for guests failed. + #[error("Failed to enable AMX tile state components for guests: errno={errno}")] + AmxGuestTileRequest { errno: i64 }, +} + #[serde_with::serde_as] #[derive(Debug, Clone, serde::Serialize, serde::Deserialize)] pub struct XsaveState { From 0692b17c4c8f60a5f9e72d1f2a0e970eb9011552 Mon Sep 17 00:00:00 2001 From: Oliver Anderson Date: Mon, 1 Dec 2025 13:25:54 +0100 Subject: [PATCH 260/294] hypervisor: Introduce enable_amx_state_components function We introduce a static enable_amx_state_components method on the XSaveState struct that will be used in a follow up commit. We will also extend the logic of what this method does in the near future. Signed-Off-by: Oliver Anderson On-behalf-of: SAP oliver.anderson@sap.com --- hypervisor/src/arch/x86/mod.rs | 86 ++++++++++++++++++++++++++++++++++ 1 file changed, 86 insertions(+) diff --git a/hypervisor/src/arch/x86/mod.rs b/hypervisor/src/arch/x86/mod.rs index 6b5502757d..6faf610769 100644 --- a/hypervisor/src/arch/x86/mod.rs +++ b/hypervisor/src/arch/x86/mod.rs @@ -13,6 +13,8 @@ use core::fmt; +use crate::{CpuVendor, Hypervisor}; + use thiserror::Error; #[cfg(all(feature = "mshv_emulator", target_arch = "x86_64"))] @@ -335,6 +337,90 @@ pub struct XsaveState { pub region: [u32; 1024usize], } +impl XsaveState { + const ARCH_GET_XCOMP_SUPP: usize = 0x1021; + const ARCH_REQ_XCOMP_GUEST_PERM: usize = 0x1025; + const ARCH_XCOMP_TILECFG: usize = 17; + const ARCH_XCOMP_TILEDATA: usize = 18; + + /// This function enables the AMX related TILECFG and TILEDATA state components for guests. + /// + /// # Background + /// AMX uses a concept of tiles which are small 2D blocks of data stored in registers on the CPU, + /// where the TILECFG state component defines the shape and size of each tile (rows and columns), + /// and the TILEDATA state component holds the actual elements of these tiles used by matrix operations. + pub fn enable_amx_state_components( + hypervisor: &dyn Hypervisor, + ) -> Result<(), AmxGuestSupportError> { + Self::amx_supported(hypervisor)?; + + Self::request_guest_amx_support()?; + + Ok(()) + } + + /// Checks whether the host supports AMX. + /// + /// The `hypervisor` is used to inform us about the + /// CPU vendor (AMX is currently only available on Intel CPUs). + /// + /// Returns `Ok` if AMX is supported on the host and `Err` otherwise. + fn amx_supported(hypervisor: &dyn Hypervisor) -> Result<(), AmxGuestSupportError> { + if !matches!(hypervisor.get_cpu_vendor(), CpuVendor::Intel) { + return Err(AmxGuestSupportError::VendorDoesNotSupportAmx); + } + // We make a syscall to get information about which dynamically enabled + // XSAVE state components are supported. The corresponding state + // component bits will get set in `features` + let mut features: usize = 0; + // SAFETY: Syscall with valid parameters + let result = unsafe { + libc::syscall( + libc::SYS_arch_prctl, + Self::ARCH_GET_XCOMP_SUPP, + &raw mut features, + ) + }; + // Ensure that both the TILECFG and TILEDATA state components are supported + let mask = (1 << Self::ARCH_XCOMP_TILECFG) | (1 << Self::ARCH_XCOMP_TILEDATA); + if result != 0 { + return Err(AmxGuestSupportError::AmxNotSupported { errno: result }); + } + + if (features & mask) == mask { + Ok(()) + } else { + Err(AmxGuestSupportError::InvalidAmxTileFeatureCheck { features }) + } + } + + /// Asks the kernel to provide AMX support for guests. + fn request_guest_amx_support() -> Result<(), AmxGuestSupportError> { + // Make a syscall to request permission for guests to use the TILECFG + // and TILEDATA state components. Note that as per the kernel + // [documentation](https://docs.kernel.org/arch/x86/xstate.html#dynamic-features-for-virtual-machines) + // we need to pass in the number of the highest XSTATE component which is required for + // the facility to work which in this case is TILEDATA. + // + // This syscall will alter the size of `kvm_xsave` when KVM is used as the hypervisor. + // + // SAFETY: Syscall with valid parameters + let result = unsafe { + libc::syscall( + libc::SYS_arch_prctl, + Self::ARCH_REQ_XCOMP_GUEST_PERM, + Self::ARCH_XCOMP_TILEDATA, + ) + }; + if result == 0 { + Ok(()) + } else { + // Unwrap is OK because we verified that `result` is not zero + Err(AmxGuestSupportError::AmxGuestTileRequest { errno: result }) + } + } +} + impl Default for XsaveState { fn default() -> Self { // SAFETY: this is plain old data structure From 462be476355e6d1e0bc0ec7a164355cdbc8a06bc Mon Sep 17 00:00:00 2001 From: Oliver Anderson Date: Mon, 1 Dec 2025 13:40:21 +0100 Subject: [PATCH 261/294] vmm: Use XsaveState::enable_amx_state_components We use the central amx_state_components enabling function in the CpuManager constructor. This way we can make changes to the AMX related state components functionality without needing to update the CpuManager's constructor. Signed-Off-by: Oliver Anderson On-behalf-of: SAP --- vmm/src/cpu.rs | 30 ++---------------------------- 1 file changed, 2 insertions(+), 28 deletions(-) diff --git a/vmm/src/cpu.rs b/vmm/src/cpu.rs index d34b151ddd..ebdfbf478c 100644 --- a/vmm/src/cpu.rs +++ b/vmm/src/cpu.rs @@ -738,34 +738,8 @@ impl CpuManager { #[cfg(target_arch = "x86_64")] if config.features.amx { - const ARCH_GET_XCOMP_GUEST_PERM: usize = 0x1024; - const ARCH_REQ_XCOMP_GUEST_PERM: usize = 0x1025; - const XFEATURE_XTILEDATA: usize = 18; - const XFEATURE_XTILEDATA_MASK: usize = 1 << XFEATURE_XTILEDATA; - - // SAFETY: the syscall is only modifying kernel internal - // data structures that the kernel is itself expected to safeguard. - let amx_tile = unsafe { - libc::syscall( - libc::SYS_arch_prctl, - ARCH_REQ_XCOMP_GUEST_PERM, - XFEATURE_XTILEDATA, - ) - }; - - if amx_tile != 0 { - return Err(Error::AmxEnable(anyhow!("Guest AMX usage not supported"))); - } else { - let mask: usize = 0; - // SAFETY: the mask being modified (not marked mutable as it is - // modified in unsafe only which is permitted) isn't in use elsewhere. - let result = unsafe { - libc::syscall(libc::SYS_arch_prctl, ARCH_GET_XCOMP_GUEST_PERM, &mask) - }; - if result != 0 || (mask & XFEATURE_XTILEDATA_MASK) != XFEATURE_XTILEDATA_MASK { - return Err(Error::AmxEnable(anyhow!("Guest AMX usage not supported"))); - } - } + hypervisor::arch::x86::XsaveState::enable_amx_state_components(hypervisor.as_ref()) + .map_err(|e| crate::cpu::Error::AmxEnable(e.into()))?; } let proximity_domain_per_cpu: BTreeMap = { From 429bda0db11ab3e22bab4f41ac313f2369c59ade Mon Sep 17 00:00:00 2001 From: Oliver Anderson Date: Mon, 1 Dec 2025 14:05:21 +0100 Subject: [PATCH 262/294] hypervisor: check_extension_int method on the hypervisor trait We will need to query KVM for the size of the xsave struct in a follow up commit. This commit introduces the necessary method on the hypervisor trait to do that. Signed-Off-by: Oliver Anderson On-behalf-of: SAP oliver.anderson@sap.com --- hypervisor/src/hypervisor.rs | 10 ++++++++++ hypervisor/src/kvm/mod.rs | 4 ++++ hypervisor/src/mshv/mod.rs | 6 ++++++ 3 files changed, 20 insertions(+) diff --git a/hypervisor/src/hypervisor.rs b/hypervisor/src/hypervisor.rs index 13d00fa009..1974b02861 100644 --- a/hypervisor/src/hypervisor.rs +++ b/hypervisor/src/hypervisor.rs @@ -111,6 +111,16 @@ pub trait Hypervisor: Send + Sync { /// Return a hypervisor-agnostic Vm trait object /// fn create_vm(&self) -> Result>; + + /// Query the hypervisor for the availability of an extension. + /// + /// + /// Generally 0 means no and 1 means yes, but some extensions may report + /// additional information in the integer return value. + /// + #[cfg(feature = "kvm")] + fn check_extension_int(&self, capability: kvm_ioctls::Cap) -> i32; + /// /// Create a Vm of a specific type using the underlying hypervisor /// Return a hypervisor-agnostic Vm trait object diff --git a/hypervisor/src/kvm/mod.rs b/hypervisor/src/kvm/mod.rs index 8d1bc228f1..886d4dc4d7 100644 --- a/hypervisor/src/kvm/mod.rs +++ b/hypervisor/src/kvm/mod.rs @@ -1281,6 +1281,10 @@ impl hypervisor::Hypervisor for KvmHypervisor { self.create_vm_with_type(vm_type) } + fn check_extension_int(&self, capability: kvm_ioctls::Cap) -> i32 { + self.kvm.check_extension_int(capability) + } + fn check_required_extensions(&self) -> hypervisor::Result<()> { check_required_kvm_extensions(&self.kvm) .map_err(|e| hypervisor::HypervisorError::CheckExtensions(e.into())) diff --git a/hypervisor/src/mshv/mod.rs b/hypervisor/src/mshv/mod.rs index 294dee2e7d..39a47fb733 100644 --- a/hypervisor/src/mshv/mod.rs +++ b/hypervisor/src/mshv/mod.rs @@ -427,6 +427,12 @@ impl hypervisor::Hypervisor for MshvHypervisor { let vm_type = 0; self.create_vm_with_type(vm_type) } + + #[cfg(feature = "kvm")] + fn check_extension_int(&self, _capability: kvm_ioctls::Cap) -> i32 { + unimplemented!() + } + #[cfg(target_arch = "x86_64")] /// /// Get the supported CpuID From 653c341d924fc1b3a039612ff9ff856377a0338e Mon Sep 17 00:00:00 2001 From: Oliver Anderson Date: Mon, 1 Dec 2025 14:49:53 +0100 Subject: [PATCH 263/294] hypervisor: Fix live migration when AMX is configured AMX requires dynamically enabling certain large state components which leads to an increase in the size of kvm_xsave. This was not taken into account by Cloud hypervisor until now. We solve this by refactoring `XSaveState` to directly wrap `kvm::Xsave` and always ensure (via a OnceLocked static variable) that all operations on the wrapped xsave state obtain an instance of the right size. Signed-Off-by: Oliver Anderson On-behalf-of: SAP oliver.anderson@sap.com --- hypervisor/Cargo.toml | 5 ++- hypervisor/src/arch/x86/mod.rs | 68 +++++++++++++++++++++++++------- hypervisor/src/kvm/mod.rs | 27 +++++-------- hypervisor/src/kvm/x86_64/mod.rs | 15 ------- 4 files changed, 67 insertions(+), 48 deletions(-) diff --git a/hypervisor/Cargo.toml b/hypervisor/Cargo.toml index 73b2cd612a..6fb6a02044 100644 --- a/hypervisor/Cargo.toml +++ b/hypervisor/Cargo.toml @@ -21,7 +21,10 @@ cfg-if = { workspace = true } concat-idents = "1.1.5" igvm = { workspace = true, optional = true } igvm_defs = { workspace = true, optional = true } -kvm-bindings = { workspace = true, optional = true, features = ["serde"] } +kvm-bindings = { workspace = true, optional = true, features = [ + "fam-wrappers", + "serde", +] } kvm-ioctls = { workspace = true, optional = true } libc = { workspace = true } log = { workspace = true } diff --git a/hypervisor/src/arch/x86/mod.rs b/hypervisor/src/arch/x86/mod.rs index 6faf610769..56d1e98a24 100644 --- a/hypervisor/src/arch/x86/mod.rs +++ b/hypervisor/src/arch/x86/mod.rs @@ -12,11 +12,13 @@ // use core::fmt; - -use crate::{CpuVendor, Hypervisor}; +#[cfg(feature = "kvm")] +use std::sync::OnceLock; use thiserror::Error; +use crate::{CpuVendor, Hypervisor}; + #[cfg(all(feature = "mshv_emulator", target_arch = "x86_64"))] pub mod emulator; pub mod gdt; @@ -330,12 +332,15 @@ pub enum AmxGuestSupportError { AmxGuestTileRequest { errno: i64 }, } -#[serde_with::serde_as] +/// The length of the XSAVE flexible array member (FAM). +/// This length increases when arch_prctl is utilized to dynamically add state components. +/// +/// IMPORTANT: This static should only be updated via methods on [`XsaveState`]. +#[cfg(feature = "kvm")] +static XSAVE_FAM_LENGTH: OnceLock = OnceLock::new(); + #[derive(Debug, Clone, serde::Serialize, serde::Deserialize)] -pub struct XsaveState { - #[serde_as(as = "[_; 1024usize]")] - pub region: [u32; 1024usize], -} +pub struct XsaveState(#[cfg(feature = "kvm")] pub(crate) kvm_bindings::Xsave); impl XsaveState { const ARCH_GET_XCOMP_SUPP: usize = 0x1021; @@ -343,6 +348,27 @@ impl XsaveState { const ARCH_XCOMP_TILECFG: usize = 17; const ARCH_XCOMP_TILEDATA: usize = 18; + /// Construct an instance via the given initializer. + /// + /// As long as dynamically enabled state components have only been enabled + /// through static methods on this struct it is guaranteed that the + /// initialization routine is given an Xsave struct of the expected size. + #[cfg(feature = "kvm")] + pub(crate) fn with_initializer( + mut init: F, + ) -> Result> + where + F: FnMut(&mut kvm_bindings::Xsave) -> Result<(), E>, + E: Into>, + { + let fam_length = XSAVE_FAM_LENGTH.get().unwrap_or(&0); + + let mut xsave = kvm_bindings::Xsave::new(*fam_length)?; + + init(&mut xsave).map_err(Into::into)?; + Ok(Self(xsave)) + } + /// This function enables the AMX related TILECFG and TILEDATA state components for guests. /// /// # Background @@ -353,9 +379,28 @@ impl XsaveState { hypervisor: &dyn Hypervisor, ) -> Result<(), AmxGuestSupportError> { Self::amx_supported(hypervisor)?; - Self::request_guest_amx_support()?; + // If we are using the KVM hypervisor we meed to query for the new xsave2 size and update + // `XSAVE_FAM_LENGTH` accordingly. + #[cfg(feature = "kvm")] + { + // Obtain the number of bytes the kvm_xsave struct requires. + // This number is documented to always be at least 4096 bytes, but + let size = hypervisor.check_extension_int(kvm_ioctls::Cap::Xsave2); + // Reality check: We should at least have this number of bytes and probably more as we have enabled + // AMX tiles. If this is not the case, it is probably best to panic. + assert!(size >= 4096); + let fam_length = { + // Computation is documented in `[kvm_bindings::kvm_xsave2::len]` + ((size as usize) - size_of::()) + .div_ceil(size_of::()) + }; + XSAVE_FAM_LENGTH + .set(fam_length) + .expect("This should only be set once"); + } + Ok(()) } @@ -420,10 +465,3 @@ impl XsaveState { } } } - -impl Default for XsaveState { - fn default() -> Self { - // SAFETY: this is plain old data structure - unsafe { ::std::mem::zeroed() } - } -} diff --git a/hypervisor/src/kvm/mod.rs b/hypervisor/src/kvm/mod.rs index 886d4dc4d7..eea499da51 100644 --- a/hypervisor/src/kvm/mod.rs +++ b/hypervisor/src/kvm/mod.rs @@ -3004,13 +3004,11 @@ impl KvmVcpu { /// X86 specific call that returns the vcpu's current "xsave struct". /// fn get_xsave(&self) -> cpu::Result { - Ok(self - .fd - .lock() - .unwrap() - .get_xsave() - .map_err(|e| cpu::HypervisorCpuError::GetXsaveState(e.into()))? - .into()) + XsaveState::with_initializer(|state| + // SAFETY: Any configured dynamically enabled state components are always enabled via + // static methods on `XsaveState` hence we know that `state` has the expected size. + unsafe { self.fd.lock().unwrap().get_xsave2(state) }) + .map_err(|e| cpu::HypervisorCpuError::GetXsaveState(anyhow::Error::from_boxed(e))) } #[cfg(target_arch = "x86_64")] @@ -3018,16 +3016,11 @@ impl KvmVcpu { /// X86 specific call that sets the vcpu's current "xsave struct". /// fn set_xsave(&self, xsave: &XsaveState) -> cpu::Result<()> { - let xsave: kvm_bindings::kvm_xsave = (*xsave).clone().into(); - // SAFETY: Here we trust the kernel not to read past the end of the kvm_xsave struct - // when calling the kvm-ioctl library function. - unsafe { - self.fd - .lock() - .unwrap() - .set_xsave(&xsave) - .map_err(|e| cpu::HypervisorCpuError::SetXsaveState(e.into())) - } + // SAFETY: Any configured dynamically enabled state components are always enabled via + // static methods on `XsaveState` hence we know that the wrapped instance has the + // expected size. + unsafe { self.fd.lock().unwrap().set_xsave2(&xsave.0) } + .map_err(|e| cpu::HypervisorCpuError::SetXsaveState(e.into())) } #[cfg(target_arch = "x86_64")] diff --git a/hypervisor/src/kvm/x86_64/mod.rs b/hypervisor/src/kvm/x86_64/mod.rs index c1bda9d9be..2efd12b0e4 100644 --- a/hypervisor/src/kvm/x86_64/mod.rs +++ b/hypervisor/src/kvm/x86_64/mod.rs @@ -291,18 +291,3 @@ impl From for kvm_msr_entry { } } } - -impl From for XsaveState { - fn from(s: kvm_xsave) -> Self { - Self { region: s.region } - } -} - -impl From for kvm_xsave { - fn from(s: XsaveState) -> Self { - Self { - region: s.region, - extra: Default::default(), - } - } -} From 78f1e533914d9371565794f0d4e8d202244666f4 Mon Sep 17 00:00:00 2001 From: Philipp Schuster Date: Mon, 1 Dec 2025 14:10:28 +0100 Subject: [PATCH 264/294] build: don't strip binary With debug symbols, we will get better backtraces and can improve our experience debugging. The only downside is larger binary size which is negligible in our case. There are no implications for the performance. Stripped: 3.9M Unstripped: 4.7M Signed-off-by: Philipp Schuster On-behalf-of: SAP philipp.schuster@sap.com --- Cargo.toml | 1 - 1 file changed, 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index c948bca3f3..01e904b40a 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -21,7 +21,6 @@ rust-version = "1.88.0" codegen-units = 1 lto = true opt-level = "s" -strip = true # Tradeof between performance and fast compilation times for local testing and # development with frequent rebuilds. From 4ad0fc65ade321213a94fcb3d5956995cd74bfbf Mon Sep 17 00:00:00 2001 From: Songqian Li Date: Wed, 12 Nov 2025 20:07:06 +0800 Subject: [PATCH 265/294] seccomp: add arch_prctl syscall for amx When enabling amx feature, we should call arch_prctl to request permission to use tile data for guest. The permission should be requested before the first vcpu is created, so we need to call arch_prctl in vmm thread. This patch adds the arch_prctl syscall for vmm_thread_rules. Fixes: #7516 Signed-off-by: Songqian Li --- vmm/src/seccomp_filters.rs | 2 ++ 1 file changed, 2 insertions(+) diff --git a/vmm/src/seccomp_filters.rs b/vmm/src/seccomp_filters.rs index f0bc1e1ef4..bd1ce491d1 100644 --- a/vmm/src/seccomp_filters.rs +++ b/vmm/src/seccomp_filters.rs @@ -550,6 +550,8 @@ fn vmm_thread_rules( (libc::SYS_accept4, vec![]), #[cfg(target_arch = "x86_64")] (libc::SYS_access, vec![]), + #[cfg(target_arch = "x86_64")] + (libc::SYS_arch_prctl, vec![]), (libc::SYS_bind, vec![]), (libc::SYS_brk, vec![]), (libc::SYS_clock_gettime, vec![]), From 127f2d8f1ebfc43da697fa17a9f5f9457699fb23 Mon Sep 17 00:00:00 2001 From: Oliver Anderson Date: Wed, 3 Dec 2025 00:57:15 +0100 Subject: [PATCH 266/294] seccomp: Replace KVM_GET_XSAVE with KVM_GET_XSAVE2 As we have replaced all KVM_GET_XSAVE calls with KVM_GET_XSAVE2 we need to update the seccomp filters accordingly. Signed-Off-by: Oliver Anderson On-behalf-of: SAP oliver.anderson@sap.com --- vmm/src/seccomp_filters.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/vmm/src/seccomp_filters.rs b/vmm/src/seccomp_filters.rs index bd1ce491d1..44ba48d293 100644 --- a/vmm/src/seccomp_filters.rs +++ b/vmm/src/seccomp_filters.rs @@ -376,7 +376,7 @@ fn create_vmm_ioctl_seccomp_rule_kvm() -> Result, BackendError> const KVM_GET_SREGS: u64 = 0x8138_ae83; const KVM_GET_TSC_KHZ: u64 = 0xaea3; const KVM_GET_XCRS: u64 = 0x8188_aea6; - const KVM_GET_XSAVE: u64 = 0x9000_aea4; + const KVM_GET_XSAVE2: u64 = 0x9000_aecf; const KVM_KVMCLOCK_CTRL: u64 = 0xaead; const KVM_SET_CLOCK: u64 = 0x4030_ae7b; const KVM_SET_CPUID2: u64 = 0x4008_ae90; @@ -404,7 +404,7 @@ fn create_vmm_ioctl_seccomp_rule_kvm() -> Result, BackendError> and![Cond::new(1, ArgLen::Dword, Eq, KVM_GET_SREGS)?], and![Cond::new(1, ArgLen::Dword, Eq, KVM_GET_TSC_KHZ)?], and![Cond::new(1, ArgLen::Dword, Eq, KVM_GET_XCRS,)?], - and![Cond::new(1, ArgLen::Dword, Eq, KVM_GET_XSAVE,)?], + and![Cond::new(1, ArgLen::Dword, Eq, KVM_GET_XSAVE2,)?], and![Cond::new(1, ArgLen::Dword, Eq, KVM_KVMCLOCK_CTRL)?], and![Cond::new(1, ArgLen::Dword, Eq, KVM_SET_CLOCK)?], and![Cond::new(1, ArgLen::Dword, Eq, KVM_SET_CPUID2)?], From b23e6827fcc02525ecf2e26d84ab0e55e8837383 Mon Sep 17 00:00:00 2001 From: Pascal Scholz Date: Tue, 25 Nov 2025 16:28:30 +0100 Subject: [PATCH 267/294] vmm: Write directly to member when creating new devices Virtio PCI devices are created in a set of nested functions. In each of this function a vector is created to add created devices to, only to be appended to the vector of the higher nesting level. Those nested vectors are unnecessary as we can directly write to the member of. Signed-off-by: Pascal Scholz On-behalf-of: SAP pascal.scholz@sap.com --- vmm/src/device_manager.rs | 146 +++++++++++++++----------------------- 1 file changed, 58 insertions(+), 88 deletions(-) diff --git a/vmm/src/device_manager.rs b/vmm/src/device_manager.rs index fa409a27e8..6e712378f9 100644 --- a/vmm/src/device_manager.rs +++ b/vmm/src/device_manager.rs @@ -1406,8 +1406,6 @@ impl DeviceManager { ) -> DeviceManagerResult<()> { trace_scoped!("create_devices"); - let mut virtio_devices: Vec = Vec::new(); - self.cpu_manager .lock() .unwrap() @@ -1458,12 +1456,8 @@ impl DeviceManager { self.original_termios_opt = original_termios_opt; - self.console = self.add_console_devices( - &legacy_interrupt_manager, - &mut virtio_devices, - console_info, - console_resize_pipe, - )?; + self.console = + self.add_console_devices(&legacy_interrupt_manager, console_info, console_resize_pipe)?; #[cfg(not(target_arch = "riscv64"))] if let Some(tpm) = self.config.clone().lock().unwrap().tpm.as_ref() { @@ -1473,11 +1467,8 @@ impl DeviceManager { } self.legacy_interrupt_manager = Some(legacy_interrupt_manager); - virtio_devices.append(&mut self.make_virtio_devices()?); - - self.add_pci_devices(virtio_devices.clone())?; - - self.virtio_devices = virtio_devices; + self.make_virtio_devices()?; + self.add_pci_devices()?; // Add pvmemcontrol if required #[cfg(feature = "pvmemcontrol")] @@ -1586,10 +1577,7 @@ impl DeviceManager { } #[allow(unused_variables)] - fn add_pci_devices( - &mut self, - virtio_devices: Vec, - ) -> DeviceManagerResult<()> { + fn add_pci_devices(&mut self) -> DeviceManagerResult<()> { let iommu_id = String::from(IOMMU_DEVICE_NAME); let iommu_address_width_bits = @@ -1631,7 +1619,7 @@ impl DeviceManager { let mut iommu_attached_devices = Vec::new(); { - for handle in virtio_devices { + for handle in self.virtio_devices.clone() { let mapping: Option> = if handle.iommu { self.iommu_mapping.clone() } else { @@ -2319,7 +2307,6 @@ impl DeviceManager { fn add_virtio_console_device( &mut self, - virtio_devices: &mut Vec, console_fd: ConsoleOutput, resize_pipe: Option>, ) -> DeviceManagerResult>> { @@ -2378,7 +2365,7 @@ impl DeviceManager { ) .map_err(DeviceManagerError::CreateVirtioConsole)?; let virtio_console_device = Arc::new(Mutex::new(virtio_console_device)); - virtio_devices.push(MetaVirtioDevice { + self.virtio_devices.push(MetaVirtioDevice { virtio_device: Arc::clone(&virtio_console_device) as Arc>, iommu: console_config.iommu, @@ -2411,7 +2398,6 @@ impl DeviceManager { fn add_console_devices( &mut self, interrupt_manager: &Arc>, - virtio_devices: &mut Vec, console_info: Option, console_resize_pipe: Option>, ) -> DeviceManagerResult> { @@ -2480,11 +2466,8 @@ impl DeviceManager { } } - let console_resizer = self.add_virtio_console_device( - virtio_devices, - console_info.console_main_fd, - console_resize_pipe, - )?; + let console_resizer = + self.add_virtio_console_device(console_info.console_main_fd, console_resize_pipe)?; Ok(Arc::new(Console { console_resizer })) } @@ -2542,35 +2525,33 @@ impl DeviceManager { Ok(()) } - fn make_virtio_devices(&mut self) -> DeviceManagerResult> { - let mut devices: Vec = Vec::new(); - + fn make_virtio_devices(&mut self) -> DeviceManagerResult<()> { // Create "standard" virtio devices (net/block/rng) - devices.append(&mut self.make_virtio_block_devices()?); - devices.append(&mut self.make_virtio_net_devices()?); - devices.append(&mut self.make_virtio_rng_devices()?); + self.make_virtio_block_devices()?; + self.make_virtio_net_devices()?; + self.make_virtio_rng_devices()?; // Add virtio-fs if required - devices.append(&mut self.make_virtio_fs_devices()?); + self.make_virtio_fs_devices()?; // Add virtio-pmem if required - devices.append(&mut self.make_virtio_pmem_devices()?); + self.make_virtio_pmem_devices()?; // Add virtio-vsock if required - devices.append(&mut self.make_virtio_vsock_devices()?); + self.make_virtio_vsock_devices()?; - devices.append(&mut self.make_virtio_mem_devices()?); + self.make_virtio_mem_devices()?; // Add virtio-balloon if required - devices.append(&mut self.make_virtio_balloon_devices()?); + self.make_virtio_balloon_devices()?; // Add virtio-watchdog device - devices.append(&mut self.make_virtio_watchdog_devices()?); + self.make_virtio_watchdog_devices()?; // Add vDPA devices if required - devices.append(&mut self.make_vdpa_devices()?); + self.make_vdpa_devices()?; - Ok(devices) + Ok(()) } // Cache whether aio is supported to avoid checking for very block device @@ -2841,18 +2822,17 @@ impl DeviceManager { }) } - fn make_virtio_block_devices(&mut self) -> DeviceManagerResult> { - let mut devices = Vec::new(); - + fn make_virtio_block_devices(&mut self) -> DeviceManagerResult<()> { let mut block_devices = self.config.lock().unwrap().disks.clone(); if let Some(disk_list_cfg) = &mut block_devices { for disk_cfg in disk_list_cfg.iter_mut() { - devices.push(self.make_virtio_block_device(disk_cfg, false)?); + let device = self.make_virtio_block_device(disk_cfg, false)?; + self.virtio_devices.push(device); } } self.config.lock().unwrap().disks = block_devices; - Ok(devices) + Ok(()) } fn make_virtio_net_device( @@ -3019,22 +2999,20 @@ impl DeviceManager { } /// Add virto-net and vhost-user-net devices - fn make_virtio_net_devices(&mut self) -> DeviceManagerResult> { - let mut devices = Vec::new(); + fn make_virtio_net_devices(&mut self) -> DeviceManagerResult<()> { let mut net_devices = self.config.lock().unwrap().net.clone(); if let Some(net_list_cfg) = &mut net_devices { for net_cfg in net_list_cfg.iter_mut() { - devices.push(self.make_virtio_net_device(net_cfg)?); + let device = self.make_virtio_net_device(net_cfg)?; + self.virtio_devices.push(device); } } self.config.lock().unwrap().net = net_devices; - Ok(devices) + Ok(()) } - fn make_virtio_rng_devices(&mut self) -> DeviceManagerResult> { - let mut devices = Vec::new(); - + fn make_virtio_rng_devices(&mut self) -> DeviceManagerResult<()> { // Add virtio-rng if required let rng_config = self.config.lock().unwrap().rng.clone(); if let Some(rng_path) = rng_config.src.to_str() { @@ -3055,7 +3033,7 @@ impl DeviceManager { ) .map_err(DeviceManagerError::CreateVirtioRng)?, )); - devices.push(MetaVirtioDevice { + self.virtio_devices.push(MetaVirtioDevice { virtio_device: Arc::clone(&virtio_rng_device) as Arc>, iommu: rng_config.iommu, @@ -3073,7 +3051,7 @@ impl DeviceManager { .insert(id.clone(), device_node!(id, virtio_rng_device)); } - Ok(devices) + Ok(()) } fn make_virtio_fs_device( @@ -3129,18 +3107,17 @@ impl DeviceManager { } } - fn make_virtio_fs_devices(&mut self) -> DeviceManagerResult> { - let mut devices = Vec::new(); - + fn make_virtio_fs_devices(&mut self) -> DeviceManagerResult<()> { let mut fs_devices = self.config.lock().unwrap().fs.clone(); if let Some(fs_list_cfg) = &mut fs_devices { for fs_cfg in fs_list_cfg.iter_mut() { - devices.push(self.make_virtio_fs_device(fs_cfg)?); + let device = self.make_virtio_fs_device(fs_cfg)?; + self.virtio_devices.push(device); } } self.config.lock().unwrap().fs = fs_devices; - Ok(devices) + Ok(()) } fn make_virtio_pmem_device( @@ -3315,18 +3292,18 @@ impl DeviceManager { }) } - fn make_virtio_pmem_devices(&mut self) -> DeviceManagerResult> { - let mut devices = Vec::new(); + fn make_virtio_pmem_devices(&mut self) -> DeviceManagerResult<()> { // Add virtio-pmem if required let mut pmem_devices = self.config.lock().unwrap().pmem.clone(); if let Some(pmem_list_cfg) = &mut pmem_devices { for pmem_cfg in pmem_list_cfg.iter_mut() { - devices.push(self.make_virtio_pmem_device(pmem_cfg)?); + let device = self.make_virtio_pmem_device(pmem_cfg)?; + self.virtio_devices.push(device); } } self.config.lock().unwrap().pmem = pmem_devices; - Ok(devices) + Ok(()) } fn make_virtio_vsock_device( @@ -3386,21 +3363,18 @@ impl DeviceManager { }) } - fn make_virtio_vsock_devices(&mut self) -> DeviceManagerResult> { - let mut devices = Vec::new(); - + fn make_virtio_vsock_devices(&mut self) -> DeviceManagerResult<()> { let mut vsock = self.config.lock().unwrap().vsock.clone(); if let Some(vsock_cfg) = &mut vsock { - devices.push(self.make_virtio_vsock_device(vsock_cfg)?); + let device = self.make_virtio_vsock_device(vsock_cfg)?; + self.virtio_devices.push(device); } self.config.lock().unwrap().vsock = vsock; - Ok(devices) + Ok(()) } - fn make_virtio_mem_devices(&mut self) -> DeviceManagerResult> { - let mut devices = Vec::new(); - + fn make_virtio_mem_devices(&mut self) -> DeviceManagerResult<()> { let mm = self.memory_manager.clone(); let mut mm = mm.lock().unwrap(); for (memory_zone_id, memory_zone) in mm.memory_zones_mut().iter_mut() { @@ -3435,7 +3409,7 @@ impl DeviceManager { self.virtio_mem_devices.push(Arc::clone(&virtio_mem_device)); - devices.push(MetaVirtioDevice { + self.virtio_devices.push(MetaVirtioDevice { virtio_device: Arc::clone(&virtio_mem_device) as Arc>, iommu: false, @@ -3454,7 +3428,7 @@ impl DeviceManager { } } - Ok(devices) + Ok(()) } #[cfg(feature = "pvmemcontrol")] @@ -3499,9 +3473,7 @@ impl DeviceManager { Ok((pvmemcontrol_bus_device, pvmemcontrol_pci_device)) } - fn make_virtio_balloon_devices(&mut self) -> DeviceManagerResult> { - let mut devices = Vec::new(); - + fn make_virtio_balloon_devices(&mut self) -> DeviceManagerResult<()> { if let Some(balloon_config) = &self.config.lock().unwrap().balloon { let id = String::from(BALLOON_DEVICE_NAME); info!("Creating virtio-balloon device: id = {}", id); @@ -3524,7 +3496,7 @@ impl DeviceManager { self.balloon = Some(virtio_balloon_device.clone()); - devices.push(MetaVirtioDevice { + self.virtio_devices.push(MetaVirtioDevice { virtio_device: Arc::clone(&virtio_balloon_device) as Arc>, iommu: false, @@ -3539,14 +3511,12 @@ impl DeviceManager { .insert(id.clone(), device_node!(id, virtio_balloon_device)); } - Ok(devices) + Ok(()) } - fn make_virtio_watchdog_devices(&mut self) -> DeviceManagerResult> { - let mut devices = Vec::new(); - + fn make_virtio_watchdog_devices(&mut self) -> DeviceManagerResult<()> { if !self.config.lock().unwrap().watchdog { - return Ok(devices); + return Ok(()); } let id = String::from(WATCHDOG_DEVICE_NAME); @@ -3565,7 +3535,7 @@ impl DeviceManager { ) .map_err(DeviceManagerError::CreateVirtioWatchdog)?, )); - devices.push(MetaVirtioDevice { + self.virtio_devices.push(MetaVirtioDevice { virtio_device: Arc::clone(&virtio_watchdog_device) as Arc>, iommu: false, @@ -3579,7 +3549,7 @@ impl DeviceManager { .unwrap() .insert(id.clone(), device_node!(id, virtio_watchdog_device)); - Ok(devices) + Ok(()) } fn make_vdpa_device( @@ -3633,18 +3603,18 @@ impl DeviceManager { }) } - fn make_vdpa_devices(&mut self) -> DeviceManagerResult> { - let mut devices = Vec::new(); + fn make_vdpa_devices(&mut self) -> DeviceManagerResult<()> { // Add vdpa if required let mut vdpa_devices = self.config.lock().unwrap().vdpa.clone(); if let Some(vdpa_list_cfg) = &mut vdpa_devices { for vdpa_cfg in vdpa_list_cfg.iter_mut() { - devices.push(self.make_vdpa_device(vdpa_cfg)?); + let device = self.make_vdpa_device(vdpa_cfg)?; + self.virtio_devices.push(device); } } self.config.lock().unwrap().vdpa = vdpa_devices; - Ok(devices) + Ok(()) } fn next_device_name(&mut self, prefix: &str) -> DeviceManagerResult { From d943de4597ff2ae762649a4027bdab3dd20b6b63 Mon Sep 17 00:00:00 2001 From: Pascal Scholz Date: Wed, 3 Dec 2025 16:32:37 +0100 Subject: [PATCH 268/294] pci: Allow for device ID allocation on a bus Allocating a device ID is crucial for assigning a specific ID to a device. We need this to implement configurable PCI BDF. Signed-off-by: Pascal Scholz On-behalf-of: SAP pascal.scholz@sap.com --- pci/src/bus.rs | 150 ++++++++++++++++++++++++++++++++++++-- vmm/src/device_manager.rs | 5 +- vmm/src/pci_segment.rs | 13 +++- 3 files changed, 155 insertions(+), 13 deletions(-) diff --git a/pci/src/bus.rs b/pci/src/bus.rs index fd19321de5..5b10788ede 100644 --- a/pci/src/bus.rs +++ b/pci/src/bus.rs @@ -45,7 +45,7 @@ pub enum PciRootError { #[error("Invalid PCI device identifier provided")] InvalidPciDeviceSlot(usize), /// Valid PCI device identifier but already used. - #[error("Valid PCI device identifier but already used")] + #[error("Valid PCI device identifier but already used: {0}")] AlreadyInUsePciDeviceSlot(usize), } pub type Result = std::result::Result; @@ -166,15 +166,42 @@ impl PciBus { Ok(()) } - pub fn next_device_id(&mut self) -> Result { - for (idx, device_id) in self.device_ids.iter_mut().enumerate() { - if !(*device_id) { - *device_id = true; - return Ok(idx as u32); + /// Allocates a PCI device ID on the bus. + /// + /// - `id`: ID to allocate on the bus. If [`None`], the next free + /// device ID on the bus is allocated, else the ID given is + /// allocated + /// + /// ## Errors + /// * Returns [`PciRootError::AlreadyInUsePciDeviceSlot`] in case + /// the ID requested is already allocated. + /// * Returns [`PciRootError::InvalidPciDeviceSlot`] in case the + /// requested ID exceeds the maximum number of devices allowed per + /// bus (see [`NUM_DEVICE_IDS`]). + /// * If `id` is [`None`]: Returns + /// [`PciRootError::NoPciDeviceSlotAvailable`] if no free device + /// slot is available on the bus. + pub fn allocate_device_id(&mut self, id: Option) -> Result { + if let Some(id) = id { + if (id as usize) < NUM_DEVICE_IDS { + if !self.device_ids[id as usize] { + self.device_ids[id as usize] = true; + Ok(id as u32) + } else { + Err(PciRootError::AlreadyInUsePciDeviceSlot(id as usize)) + } + } else { + Err(PciRootError::InvalidPciDeviceSlot(id as usize)) + } + } else { + for (idx, device_id) in self.device_ids.iter_mut().enumerate() { + if !(*device_id) { + *device_id = true; + return Ok(idx as u32); + } } + Err(PciRootError::NoPciDeviceSlotAvailable) } - - Err(PciRootError::NoPciDeviceSlotAvailable) } pub fn get_device_id(&mut self, id: usize) -> Result<()> { @@ -484,3 +511,110 @@ fn parse_io_config_address(config_address: u32) -> (usize, usize, usize, usize) shift_and_mask(config_address, REGISTER_NUMBER_OFFSET, REGISTER_NUMBER_MASK), ) } + +#[cfg(test)] +mod unit_tests { + use std::error::Error; + use std::result::Result; + + use super::*; + + #[derive(Debug)] + struct MocRelocDevice; + + impl DeviceRelocation for MocRelocDevice { + fn move_bar( + &self, + _old_base: u64, + _new_base: u64, + _len: u64, + _pci_dev: &mut dyn PciDevice, + _region_type: PciBarRegionType, + ) -> Result<(), std::io::Error> { + Ok(()) + } + } + + fn setup_bus() -> PciBus { + let pci_root = PciRoot::new(None); + let moc_device_reloc = Arc::new(MocRelocDevice {}); + PciBus::new(pci_root, moc_device_reloc) + } + + #[test] + // Test to acquire all IDs that can be acquired + fn allocate_device_id_next_free() { + // The first address is occupied by the root + let mut bus = setup_bus(); + for expected_id in 1..NUM_DEVICE_IDS { + assert_eq!(expected_id as u32, bus.allocate_device_id(None).unwrap()); + } + } + + #[test] + // Test that requesting specific ID work + fn allocate_device_id_request_id() -> Result<(), Box> { + // The first address is occupied by the root + let mut bus = setup_bus(); + let max_id = (NUM_DEVICE_IDS - 1).try_into()?; + assert_eq!(0x01_u32, bus.allocate_device_id(Some(0x01))?); + assert_eq!(0x10_u32, bus.allocate_device_id(Some(0x10))?); + assert_eq!(max_id as u32, bus.allocate_device_id(Some(max_id))?); + Ok(()) + } + + #[test] + // Test that gaps resulting from explicit allocations are filled by implicit ones, + // beginning with the first free slot + fn allocate_device_id_fills_gaps() -> Result<(), Box> { + // The first address is occupied by the root + let mut bus = setup_bus(); + assert_eq!(0x01_u32, bus.allocate_device_id(Some(0x01))?); + assert_eq!(0x03_u32, bus.allocate_device_id(Some(0x03))?); + assert_eq!(0x06_u32, bus.allocate_device_id(Some(0x06))?); + assert_eq!(0x02_u32, bus.allocate_device_id(None)?); + assert_eq!(0x04_u32, bus.allocate_device_id(None)?); + assert_eq!(0x05_u32, bus.allocate_device_id(None)?); + assert_eq!(0x07_u32, bus.allocate_device_id(None)?); + Ok(()) + } + + #[test] + // Test that requesting the same ID twice fails + fn allocate_device_id_request_id_twice_fails() -> Result<(), Box> { + let mut bus = setup_bus(); + let max_id = (NUM_DEVICE_IDS - 1).try_into()?; + bus.allocate_device_id(Some(max_id))?; + let _result = bus.allocate_device_id(Some(max_id)); + assert!(matches!( + PciRootError::AlreadyInUsePciDeviceSlot(max_id.into()), + _result + )); + Ok(()) + } + + #[test] + // Test to request an invalid ID + fn allocate_device_id_request_invalid_id_fails() -> Result<(), Box> { + let mut bus = setup_bus(); + let max_id = (NUM_DEVICE_IDS + 1).try_into()?; + let _result = bus.allocate_device_id(Some(max_id)); + assert!(matches!( + PciRootError::InvalidPciDeviceSlot(max_id.into()), + _result + )); + Ok(()) + } + + #[test] + // Test to acquire an ID when all IDs were already acquired + fn allocate_device_id_none_left() { + // The first address is occupied by the root + let mut bus = setup_bus(); + for expected_id in 1..NUM_DEVICE_IDS { + assert_eq!(expected_id as u32, bus.allocate_device_id(None).unwrap()); + } + let _result = bus.allocate_device_id(None); + assert!(matches!(PciRootError::NoPciDeviceSlotAvailable, _result)); + } +} diff --git a/vmm/src/device_manager.rs b/vmm/src/device_manager.rs index 6e712378f9..87faefcca2 100644 --- a/vmm/src/device_manager.rs +++ b/vmm/src/device_manager.rs @@ -467,7 +467,7 @@ pub enum DeviceManagerError { /// Failed to find an available PCI device ID. #[error("Failed to find an available PCI device ID")] - NextPciDeviceId(#[source] pci::PciRootError), + AllocatePciDeviceId(#[source] pci::PciRootError), /// Could not reserve the PCI device ID. #[error("Could not reserve the PCI device ID")] @@ -4289,7 +4289,8 @@ impl DeviceManager { (pci_segment_id, pci_device_bdf, resources) } else { - let pci_device_bdf = self.pci_segments[pci_segment_id as usize].next_device_bdf()?; + let pci_device_bdf = + self.pci_segments[pci_segment_id as usize].allocate_device_bdf(None)?; (pci_segment_id, pci_device_bdf, None) }) diff --git a/vmm/src/pci_segment.rs b/vmm/src/pci_segment.rs index 345869c1da..7477020e52 100644 --- a/vmm/src/pci_segment.rs +++ b/vmm/src/pci_segment.rs @@ -163,15 +163,22 @@ impl PciSegment { ) } - pub(crate) fn next_device_bdf(&self) -> DeviceManagerResult { + /// Allocates a device BDF on this PCI segment + /// + /// - `device_id`: Device ID to request for BDF allocation + /// + /// ## Errors + /// * [`DeviceManagerError::AllocatePciDeviceId`] if device ID + /// allocation on the bus fails. + pub(crate) fn allocate_device_bdf(&self, device_id: Option) -> DeviceManagerResult { Ok(PciBdf::new( self.id, 0, self.pci_bus .lock() .unwrap() - .next_device_id() - .map_err(DeviceManagerError::NextPciDeviceId)? as u8, + .allocate_device_id(device_id) + .map_err(DeviceManagerError::AllocatePciDeviceId)? as u8, 0, )) } From 472f2b7e53ea561aa7fbfc2303cf6878acd5b562 Mon Sep 17 00:00:00 2001 From: Pascal Scholz Date: Thu, 4 Dec 2025 15:22:48 +0100 Subject: [PATCH 269/294] vmm: Add tests for `allocate_device_bdf` in `PciSegment` Next to tests for `allocate_device_bdf`, we introduce a new constructor `new_without_address_manager`, only available in the test build. As there is no way to instantiate an `AddressManager` in the tests, we use this constructor to work around this. Signed-off-by: Pascal Scholz On-behalf-of: SAP pascal.scholz@sap.com --- vmm/src/pci_segment.rs | 154 ++++++++++++++++++++++++++++++++++++++++- 1 file changed, 153 insertions(+), 1 deletion(-) diff --git a/vmm/src/pci_segment.rs b/vmm/src/pci_segment.rs index 7477020e52..289ae853c7 100644 --- a/vmm/src/pci_segment.rs +++ b/vmm/src/pci_segment.rs @@ -163,7 +163,7 @@ impl PciSegment { ) } - /// Allocates a device BDF on this PCI segment + /// Allocates a device BDF on this PCI segment. /// /// - `device_id`: Device ID to request for BDF allocation /// @@ -208,6 +208,65 @@ impl PciSegment { Ok(()) } + + #[cfg(test)] + /// Creates a PciSegment without the need for an [`AddressManager`] + /// for testing purpose. + /// + /// An [`AddressManager`] would otherwise be required to create + /// [`PciBus`] instances. Instead, we use any struct that implements + /// [`DeviceRelocation`] to instantiate a [`PciBus`]. + pub(crate) fn new_without_address_manager( + id: u16, + numa_node: u32, + mem32_allocator: Arc>, + mem64_allocator: Arc>, + pci_irq_slots: &[u8; 32], + device_reloc: Arc, + ) -> DeviceManagerResult { + let pci_root = PciRoot::new(None); + let pci_bus = Arc::new(Mutex::new(PciBus::new(pci_root, device_reloc.clone()))); + + let pci_config_mmio = Arc::new(Mutex::new(PciConfigMmio::new(Arc::clone(&pci_bus)))); + let mmio_config_address = + layout::PCI_MMCONFIG_START.0 + layout::PCI_MMIO_CONFIG_SIZE_PER_SEGMENT * id as u64; + + let start_of_mem32_area = mem32_allocator.lock().unwrap().base().0; + let end_of_mem32_area = mem32_allocator.lock().unwrap().end().0; + + let start_of_mem64_area = mem64_allocator.lock().unwrap().base().0; + let end_of_mem64_area = mem64_allocator.lock().unwrap().end().0; + + let segment = PciSegment { + id, + pci_bus, + pci_config_mmio, + mmio_config_address, + proximity_domain: numa_node, + pci_devices_up: 0, + pci_devices_down: 0, + #[cfg(target_arch = "x86_64")] + pci_config_io: None, + mem32_allocator, + mem64_allocator, + start_of_mem32_area, + end_of_mem32_area, + start_of_mem64_area, + end_of_mem64_area, + pci_irq_slots: *pci_irq_slots, + }; + + info!( + "Adding PCI segment: id={}, PCI MMIO config address: 0x{:x}, mem32 area [0x{:x}-0x{:x}, mem64 area [0x{:x}-0x{:x}", + segment.id, + segment.mmio_config_address, + segment.start_of_mem32_area, + segment.end_of_mem32_area, + segment.start_of_mem64_area, + segment.end_of_mem64_area + ); + Ok(segment) + } } struct PciDevSlot { @@ -480,3 +539,96 @@ impl Aml for PciSegment { .to_aml_bytes(sink) } } + +#[cfg(test)] +mod unit_tests { + use std::result::Result; + + use vm_memory::GuestAddress; + + use super::*; + + #[derive(Debug)] + struct MocRelocDevice; + impl DeviceRelocation for MocRelocDevice { + fn move_bar( + &self, + _old_base: u64, + _new_base: u64, + _len: u64, + _pci_dev: &mut dyn pci::PciDevice, + _region_type: pci::PciBarRegionType, + ) -> Result<(), std::io::Error> { + Ok(()) + } + } + + fn setup() -> PciSegment { + let guest_addr = 0_u64; + let guest_size = 0x1000_usize; + let allocator_1 = Arc::new(Mutex::new( + AddressAllocator::new(GuestAddress(guest_addr), guest_size as u64).unwrap(), + )); + let allocator_2 = Arc::new(Mutex::new( + AddressAllocator::new(GuestAddress(guest_addr), guest_size as u64).unwrap(), + )); + let moc_device_reloc = Arc::new(MocRelocDevice {}); + let arr = [0_u8; 32]; + + PciSegment::new_without_address_manager( + 0, + 0, + allocator_1, + allocator_2, + &arr, + moc_device_reloc, + ) + .unwrap() + } + + #[test] + // Test the default bdf for a segment with an empty bus (except for the root device) + fn allocate_device_bdf_default() { + // The first address is occupied by the root + let segment = setup(); + let bdf = segment.allocate_device_bdf(None).unwrap(); + assert_eq!(bdf.segment(), segment.id); + assert_eq!(bdf.bus(), 0); + assert_eq!(bdf.device(), 1); + assert_eq!(bdf.function(), 0); + } + + #[test] + // Test to acquire a bdf with s specific device ID + fn allocate_device_bdf_fixed_device_id() { + // The first address is occupied by the root + let expect_device_id = 0x10_u8; + let segment = setup(); + let bdf = segment.allocate_device_bdf(Some(expect_device_id)).unwrap(); + assert_eq!(bdf.segment(), segment.id); + assert_eq!(bdf.bus(), 0); + assert_eq!(bdf.device(), expect_device_id); + assert_eq!(bdf.function(), 0); + } + + #[test] + // Test to acquire a bdf with invalid device id, one already + // taken and the other being greater then the number of allowed + // devices per bus. + fn allocate_device_bdf_invalid_device_id() { + // The first address is occupied by the root + let already_taken_device_id = 0x0_u8; + let overflow_device_id = 0xff_u8; + let segment = setup(); + let bdf_res = segment.allocate_device_bdf(Some(already_taken_device_id)); + assert!(matches!( + bdf_res, + Err(DeviceManagerError::AllocatePciDeviceId(_)) + )); + let bdf_res = segment.allocate_device_bdf(Some(overflow_device_id)); + assert!(matches!( + bdf_res, + Err(DeviceManagerError::AllocatePciDeviceId(_)) + )); + } +} From b93424f2f7b7aeefd9833f415934a0f8a992c3e9 Mon Sep 17 00:00:00 2001 From: Pascal Scholz Date: Fri, 5 Dec 2025 08:27:10 +0100 Subject: [PATCH 270/294] vmm: Introduce `addr` argument to PCI device configs and update parser Updates all config structs in order to make the new config option available to all PCI device. Additionally update the parser so the new option becomes available on the CLI. Signed-off-by: Pascal Scholz On-behalf-of: SAP pascal.scholz@sap.com --- option_parser/src/lib.rs | 37 ++++++++++++ src/main.rs | 3 + vmm/src/config.rs | 118 ++++++++++++++++++++++++++++++++------ vmm/src/device_manager.rs | 33 ++++++++--- vmm/src/lib.rs | 3 + vmm/src/vm_config.rs | 22 +++++++ 6 files changed, 190 insertions(+), 26 deletions(-) diff --git a/option_parser/src/lib.rs b/option_parser/src/lib.rs index ff3d9ffffd..8f6760b5ab 100644 --- a/option_parser/src/lib.rs +++ b/option_parser/src/lib.rs @@ -46,6 +46,8 @@ pub enum OptionParserError { Conversion(String /* field */, String /* value */), #[error("invalid value: {0}")] InvalidValue(String), + #[error("failed to convert {1}")] + NumberConversion(#[source] ParseIntError, String), } type OptionParserResult = std::result::Result; @@ -167,6 +169,41 @@ impl OptionParser { .is_some() } + /// Parses the `addr` option of PCI devices and returns the PCI device as well as the function ID + /// + /// Returns a tuple consisting of the parsed IDs for device and function in this order. Returns an error if the + /// supplied `addr` values cannot be parsed to [`u8`]. The tuple might consist of two times [`None`] if `addr` was + /// not provided. + pub fn get_pci_device_function( + &self, + ) -> OptionParserResult<(Option, Option)> { + if let Some(addr_str) = self.get("addr") { + let (device_str, function_str) = addr_str + .split_once('.') + .ok_or(OptionParserError::InvalidValue(addr_str.to_owned()))?; + + // We also accept hex number with `0x` prefix, but need to strip it before conversion in case it's present. + let device_str = device_str.strip_prefix("0x").unwrap_or(device_str); + let device_id = u8::from_str_radix(device_str, 16) + .map_err(|e| OptionParserError::NumberConversion(e, addr_str.to_owned()))?; + + let function_str = function_str.strip_prefix("0x").unwrap_or(function_str); + let function_id = u8::from_str_radix(function_str, 16) + .map_err(|e| OptionParserError::NumberConversion(e, addr_str.to_owned()))?; + + // Currently CHV only support single-function devices. Those are mapped to function ID 0 in all cases, so we + // disallow the assignment of any other function ID. + if function_id != 0 { + todo!( + "Currently no multi function devices supported! Please use `0` as function ID." + ); + } + Ok((Some(device_id), Some(function_id))) + } else { + Ok((None, None)) + } + } + pub fn convert(&self, option: &str) -> OptionParserResult> { match self.options.get(option).and_then(|v| v.value.as_ref()) { None => Ok(None), diff --git a/src/main.rs b/src/main.rs index d096f28cb1..97246e0126 100644 --- a/src/main.rs +++ b/src/main.rs @@ -993,6 +993,7 @@ mod unit_tests { rng: RngConfig { src: PathBuf::from("/dev/urandom"), iommu: false, + bdf_device: None, }, balloon: None, fs: None, @@ -1003,6 +1004,7 @@ mod unit_tests { iommu: false, socket: None, url: None, + bdf_device: None, }, console: ConsoleConfig { file: None, @@ -1010,6 +1012,7 @@ mod unit_tests { iommu: false, socket: None, url: None, + bdf_device: None, }, #[cfg(target_arch = "x86_64")] debug_console: DebugConsoleConfig::default(), diff --git a/vmm/src/config.rs b/vmm/src/config.rs index 00d26ec881..1b37f63328 100644 --- a/vmm/src/config.rs +++ b/vmm/src/config.rs @@ -175,6 +175,9 @@ pub enum Error { /// Failed Parsing FwCfgItem config #[error("Error parsing --fw-cfg-config items")] ParseFwCfgItem(#[source] OptionParserError), + /// Failed parsing addr option + #[error("Error parsing --addr")] + ParsePciAddr(#[source] OptionParserError), } #[derive(Debug, PartialEq, Eq, Error)] @@ -1077,7 +1080,7 @@ impl DiskConfig { ops_size=,ops_one_time_burst=,ops_refill_time=,\ id=,pci_segment=,rate_limit_group=,\ queue_affinity=,\ - serial="; + serial=,addr="; pub fn parse(disk: &str) -> Result { let mut parser = OptionParser::new(); @@ -1102,7 +1105,8 @@ impl DiskConfig { .add("pci_segment") .add("serial") .add("rate_limit_group") - .add("queue_affinity"); + .add("queue_affinity") + .add("addr"); parser.parse(disk).map_err(Error::ParseDisk)?; let path = parser.get("path").map(PathBuf::from); @@ -1214,6 +1218,10 @@ impl DiskConfig { None }; + let (bdf_device, _bdf_function) = parser + .get_pci_device_function() + .map_err(Error::ParsePciAddr)?; + Ok(DiskConfig { path, readonly, @@ -1231,6 +1239,7 @@ impl DiskConfig { pci_segment, serial, queue_affinity, + bdf_device, }) } @@ -1302,7 +1311,7 @@ impl NetConfig { vhost_user=,socket=,vhost_mode=client|server,\ bw_size=,bw_one_time_burst=,bw_refill_time=,\ ops_size=,ops_one_time_burst=,ops_refill_time=,pci_segment=\ - offload_tso=on|off,offload_ufo=on|off,offload_csum=on|off\""; + offload_tso=on|off,offload_ufo=on|off,offload_csum=on|off,addr=DD.F\""; pub fn parse(net: &str) -> Result { let mut parser = OptionParser::new(); @@ -1331,7 +1340,8 @@ impl NetConfig { .add("ops_size") .add("ops_one_time_burst") .add("ops_refill_time") - .add("pci_segment"); + .add("pci_segment") + .add("addr"); parser.parse(net).map_err(Error::ParseNetwork)?; let tap = parser.get("tap"); @@ -1447,6 +1457,10 @@ impl NetConfig { None }; + let (bdf_device, _bdf_function) = parser + .get_pci_device_function() + .map_err(Error::ParsePciAddr)?; + let config = NetConfig { tap, ip, @@ -1467,6 +1481,7 @@ impl NetConfig { offload_tso, offload_ufo, offload_csum, + bdf_device, }; Ok(config) } @@ -1531,7 +1546,7 @@ impl NetConfig { impl RngConfig { pub fn parse(rng: &str) -> Result { let mut parser = OptionParser::new(); - parser.add("src").add("iommu"); + parser.add("src").add("iommu").add("addr"); parser.parse(rng).map_err(Error::ParseRng)?; let src = PathBuf::from( @@ -1545,19 +1560,27 @@ impl RngConfig { .unwrap_or(Toggle(false)) .0; - Ok(RngConfig { src, iommu }) + let (bdf_device, _bdf_function) = parser + .get_pci_device_function() + .map_err(Error::ParsePciAddr)?; + + Ok(RngConfig { + src, + iommu, + bdf_device, + }) } } impl BalloonConfig { pub const SYNTAX: &'static str = "Balloon parameters \"size=,deflate_on_oom=on|off,\ - free_page_reporting=on|off\""; + free_page_reporting=on|off,addr=\""; pub fn parse(balloon: &str) -> Result { let mut parser = OptionParser::new(); parser.add("size"); parser.add("deflate_on_oom"); - parser.add("free_page_reporting"); + parser.add("free_page_reporting").add("addr"); parser.parse(balloon).map_err(Error::ParseBalloon)?; let size = parser @@ -1578,10 +1601,15 @@ impl BalloonConfig { .unwrap_or(Toggle(false)) .0; + let (bdf_device, _bdf_function) = parser + .get_pci_device_function() + .map_err(Error::ParsePciAddr)?; + Ok(BalloonConfig { size, deflate_on_oom, free_page_reporting, + bdf_device, }) } } @@ -1589,7 +1617,8 @@ impl BalloonConfig { impl FsConfig { pub const SYNTAX: &'static str = "virtio-fs parameters \ \"tag=,socket=,num_queues=,\ - queue_size=,id=,pci_segment=\""; + queue_size=,id=,pci_segment=,\ + addr=\""; pub fn parse(fs: &str) -> Result { let mut parser = OptionParser::new(); @@ -1599,7 +1628,8 @@ impl FsConfig { .add("num_queues") .add("socket") .add("id") - .add("pci_segment"); + .add("pci_segment") + .add("addr"); parser.parse(fs).map_err(Error::ParseFileSystem)?; let tag = parser.get("tag").ok_or(Error::ParseFsTagMissing)?; @@ -1624,6 +1654,10 @@ impl FsConfig { .map_err(Error::ParseFileSystem)? .unwrap_or_default(); + let (bdf_device, _bdf_function) = parser + .get_pci_device_function() + .map_err(Error::ParsePciAddr)?; + Ok(FsConfig { tag, socket, @@ -1631,6 +1665,7 @@ impl FsConfig { queue_size, id, pci_segment, + bdf_device, }) } @@ -1756,7 +1791,7 @@ impl FwCfgItem { impl PmemConfig { pub const SYNTAX: &'static str = "Persistent memory parameters \ \"file=,size=,iommu=on|off,\ - discard_writes=on|off,id=,pci_segment=\""; + discard_writes=on|off,id=,pci_segment=,addr=\""; pub fn parse(pmem: &str) -> Result { let mut parser = OptionParser::new(); @@ -1766,7 +1801,8 @@ impl PmemConfig { .add("iommu") .add("discard_writes") .add("id") - .add("pci_segment"); + .add("pci_segment") + .add("addr"); parser.parse(pmem).map_err(Error::ParsePersistentMemory)?; let file = PathBuf::from(parser.get("file").ok_or(Error::ParsePmemFileMissing)?); @@ -1790,6 +1826,10 @@ impl PmemConfig { .map_err(Error::ParsePersistentMemory)? .unwrap_or_default(); + let (bdf_device, _bdf_function) = parser + .get_pci_device_function() + .map_err(Error::ParsePciAddr)?; + Ok(PmemConfig { file, size, @@ -1797,6 +1837,7 @@ impl PmemConfig { discard_writes, id, pci_segment, + bdf_device, }) } @@ -1829,7 +1870,8 @@ impl ConsoleConfig { .add("file") .add("iommu") .add("tcp") - .add("socket"); + .add("socket") + .add("addr"); parser.parse(console).map_err(Error::ParseConsole)?; let mut file: Option = default_consoleconfig_file(); @@ -1877,12 +1919,17 @@ impl ConsoleConfig { .unwrap_or(Toggle(false)) .0; + let (bdf_device, _bdf_function) = parser + .get_pci_device_function() + .map_err(Error::ParsePciAddr)?; + Ok(Self { file, mode, iommu, socket, url, + bdf_device, }) } } @@ -1942,7 +1989,8 @@ impl DebugConsoleConfig { } impl DeviceConfig { - pub const SYNTAX: &'static str = "Direct device assignment parameters \"path=,iommu=on|off,id=,pci_segment=\""; + pub const SYNTAX: &'static str = "Direct device assignment parameters \"\ + path=,iommu=on|off,id=,pci_segment=\""; pub fn parse(device: &str) -> Result { let mut parser = OptionParser::new(); @@ -2046,7 +2094,7 @@ impl UserDeviceConfig { impl VdpaConfig { pub const SYNTAX: &'static str = "vDPA device \ \"path=,num_queues=,iommu=on|off,\ - id=,pci_segment=\""; + id=,pci_segment=,addr=\""; pub fn parse(vdpa: &str) -> Result { let mut parser = OptionParser::new(); @@ -2055,7 +2103,8 @@ impl VdpaConfig { .add("num_queues") .add("iommu") .add("id") - .add("pci_segment"); + .add("pci_segment") + .add("addr"); parser.parse(vdpa).map_err(Error::ParseVdpa)?; let path = parser @@ -2077,12 +2126,17 @@ impl VdpaConfig { .map_err(Error::ParseVdpa)? .unwrap_or_default(); + let (bdf_device, _bdf_function) = parser + .get_pci_device_function() + .map_err(Error::ParsePciAddr)?; + Ok(VdpaConfig { path, num_queues, iommu, id, pci_segment, + bdf_device, }) } @@ -2106,7 +2160,8 @@ impl VdpaConfig { impl VsockConfig { pub const SYNTAX: &'static str = "Virtio VSOCK parameters \ - \"cid=,socket=,iommu=on|off,id=,pci_segment=\""; + \"cid=,socket=,iommu=on|off,id=,\ + pci_segment=,addr=\""; pub fn parse(vsock: &str) -> Result { let mut parser = OptionParser::new(); @@ -2115,7 +2170,8 @@ impl VsockConfig { .add("cid") .add("iommu") .add("id") - .add("pci_segment"); + .add("pci_segment") + .add("addr"); parser.parse(vsock).map_err(Error::ParseVsock)?; let socket = parser @@ -2137,12 +2193,17 @@ impl VsockConfig { .map_err(Error::ParseVsock)? .unwrap_or_default(); + let (bdf_device, _bdf_function) = parser + .get_pci_device_function() + .map_err(Error::ParsePciAddr)?; + Ok(VsockConfig { cid, socket, iommu, id, pci_segment, + bdf_device, }) } @@ -3453,6 +3514,7 @@ mod tests { pci_segment: 0, serial: None, queue_affinity: None, + bdf_device: None, } } @@ -3571,6 +3633,7 @@ mod tests { offload_tso: true, offload_ufo: true, offload_csum: true, + bdf_device: None, } } @@ -3653,6 +3716,7 @@ mod tests { RngConfig { src: PathBuf::from("/dev/random"), iommu: true, + bdf_device: None, } ); assert_eq!( @@ -3673,6 +3737,7 @@ mod tests { queue_size: 1024, id: None, pci_segment: 0, + bdf_device: None, } } @@ -3703,6 +3768,7 @@ mod tests { discard_writes: false, id: None, pci_segment: 0, + bdf_device: None, } } @@ -3746,6 +3812,7 @@ mod tests { file: None, socket: None, url: None, + bdf_device: None, } ); assert_eq!( @@ -3756,6 +3823,7 @@ mod tests { file: None, socket: None, url: None, + bdf_device: None, } ); assert_eq!( @@ -3766,6 +3834,7 @@ mod tests { file: None, socket: None, url: None, + bdf_device: None, } ); assert_eq!( @@ -3776,6 +3845,7 @@ mod tests { file: None, socket: None, url: None, + bdf_device: None, } ); assert_eq!( @@ -3786,6 +3856,7 @@ mod tests { file: Some(PathBuf::from("/tmp/console")), socket: None, url: None, + bdf_device: None, } ); assert_eq!( @@ -3796,6 +3867,7 @@ mod tests { file: None, socket: None, url: None, + bdf_device: None, } ); assert_eq!( @@ -3806,6 +3878,7 @@ mod tests { file: Some(PathBuf::from("/tmp/console")), socket: None, url: None, + bdf_device: None, } ); assert_eq!( @@ -3816,6 +3889,7 @@ mod tests { file: None, socket: Some(PathBuf::from("/tmp/serial.sock")), url: None, + bdf_device: None, } ); Ok(()) @@ -3867,6 +3941,7 @@ mod tests { iommu: false, id: None, pci_segment: 0, + bdf_device: None, } } @@ -3911,6 +3986,7 @@ mod tests { iommu: false, id: None, pci_segment: 0, + bdf_device: None, } ); assert_eq!( @@ -3921,6 +3997,7 @@ mod tests { iommu: true, id: None, pci_segment: 0, + bdf_device: None, } ); Ok(()) @@ -4173,6 +4250,7 @@ mod tests { rng: RngConfig { src: PathBuf::from("/dev/urandom"), iommu: false, + bdf_device: None, }, balloon: None, fs: None, @@ -4183,6 +4261,7 @@ mod tests { iommu: false, socket: None, url: None, + bdf_device: None, }, console: ConsoleConfig { file: None, @@ -4190,6 +4269,7 @@ mod tests { iommu: false, socket: None, url: None, + bdf_device: None, }, #[cfg(target_arch = "x86_64")] debug_console: DebugConsoleConfig::default(), @@ -4485,6 +4565,7 @@ mod tests { id: None, iommu: true, pci_segment: 1, + bdf_device: None, }); still_valid_config.validate().unwrap(); @@ -4561,6 +4642,7 @@ mod tests { id: None, iommu: false, pci_segment: 1, + bdf_device: None, }); assert_eq!( invalid_config.validate(), diff --git a/vmm/src/device_manager.rs b/vmm/src/device_manager.rs index 87faefcca2..c4e45937f8 100644 --- a/vmm/src/device_manager.rs +++ b/vmm/src/device_manager.rs @@ -896,6 +896,7 @@ struct MetaVirtioDevice { iommu: bool, id: String, pci_segment: u16, + bdf_device: Option, dma_handler: Option>, } @@ -1632,6 +1633,7 @@ impl DeviceManager { handle.id, handle.pci_segment, handle.dma_handler, + handle.bdf_device, )?; if handle.iommu { @@ -1660,7 +1662,8 @@ impl DeviceManager { } if let Some(iommu_device) = iommu_device { - let dev_id = self.add_virtio_pci_device(iommu_device, &None, iommu_id, 0, None)?; + let dev_id = + self.add_virtio_pci_device(iommu_device, &None, iommu_id, 0, None, None)?; self.iommu_attached_devices = Some((dev_id, iommu_attached_devices)); } } @@ -2372,6 +2375,7 @@ impl DeviceManager { id: id.clone(), pci_segment: 0, dma_handler: None, + bdf_device: None, }); // Fill the device tree with a new node. In case of restore, we @@ -2819,6 +2823,7 @@ impl DeviceManager { id, pci_segment: disk_cfg.pci_segment, dma_handler: None, + bdf_device: disk_cfg.bdf_device, }) } @@ -2995,6 +3000,7 @@ impl DeviceManager { id, pci_segment: net_cfg.pci_segment, dma_handler: None, + bdf_device: net_cfg.bdf_device, }) } @@ -3040,6 +3046,7 @@ impl DeviceManager { id: id.clone(), pci_segment: 0, dma_handler: None, + bdf_device: None, }); // Fill the device tree with a new node. In case of restore, we @@ -3101,6 +3108,7 @@ impl DeviceManager { id, pci_segment: fs_cfg.pci_segment, dma_handler: None, + bdf_device: fs_cfg.bdf_device, }) } else { Err(DeviceManagerError::NoVirtioFsSock) @@ -3289,6 +3297,7 @@ impl DeviceManager { id, pci_segment: pmem_cfg.pci_segment, dma_handler: None, + bdf_device: pmem_cfg.bdf_device, }) } @@ -3360,6 +3369,7 @@ impl DeviceManager { id, pci_segment: vsock_cfg.pci_segment, dma_handler: None, + bdf_device: vsock_cfg.bdf_device, }) } @@ -3416,6 +3426,7 @@ impl DeviceManager { id: memory_zone_id.clone(), pci_segment: 0, dma_handler: None, + bdf_device: None, }); // Fill the device tree with a new node. In case of restore, we @@ -3442,7 +3453,7 @@ impl DeviceManager { let pci_segment_id = 0x0_u16; let (pci_segment_id, pci_device_bdf, resources) = - self.pci_resources(&id, pci_segment_id)?; + self.pci_resources(&id, pci_segment_id, None)?; info!("Creating pvmemcontrol device: id = {}", id); let (pvmemcontrol_pci_device, pvmemcontrol_bus_device) = @@ -3503,6 +3514,7 @@ impl DeviceManager { id: id.clone(), pci_segment: 0, dma_handler: None, + bdf_device: balloon_config.bdf_device, }); self.device_tree @@ -3542,6 +3554,7 @@ impl DeviceManager { id: id.clone(), pci_segment: 0, dma_handler: None, + bdf_device: None, }); self.device_tree @@ -3600,6 +3613,7 @@ impl DeviceManager { id, pci_segment: vdpa_cfg.pci_segment, dma_handler: Some(vdpa_mapping), + bdf_device: vdpa_cfg.bdf_device, }) } @@ -3686,7 +3700,7 @@ impl DeviceManager { }; let (pci_segment_id, pci_device_bdf, resources) = - self.pci_resources(&vfio_name, device_cfg.pci_segment)?; + self.pci_resources(&vfio_name, device_cfg.pci_segment, None)?; let mut needs_dma_mapping = false; @@ -3923,7 +3937,7 @@ impl DeviceManager { }; let (pci_segment_id, pci_device_bdf, resources) = - self.pci_resources(&vfio_user_name, device_cfg.pci_segment)?; + self.pci_resources(&vfio_user_name, device_cfg.pci_segment, None)?; let legacy_interrupt_group = if let Some(legacy_interrupt_manager) = &self.legacy_interrupt_manager { @@ -4035,6 +4049,7 @@ impl DeviceManager { virtio_device_id: String, pci_segment_id: u16, dma_handler: Option>, + bdf_device: Option, ) -> DeviceManagerResult { let id = format!("{VIRTIO_PCI_DEVICE_NAME_PREFIX}-{virtio_device_id}"); @@ -4043,7 +4058,7 @@ impl DeviceManager { node.children = vec![virtio_device_id.clone()]; let (pci_segment_id, pci_device_bdf, resources) = - self.pci_resources(&id, pci_segment_id)?; + self.pci_resources(&id, pci_segment_id, bdf_device)?; // Update the existing virtio node by setting the parent. if let Some(node) = self.device_tree.lock().unwrap().get_mut(&virtio_device_id) { @@ -4180,7 +4195,7 @@ impl DeviceManager { info!("Creating pvpanic device {}", id); let (pci_segment_id, pci_device_bdf, resources) = - self.pci_resources(&id, pci_segment_id)?; + self.pci_resources(&id, pci_segment_id, None)?; let snapshot = snapshot_from_id(self.snapshot.as_ref(), id.as_str()); @@ -4218,7 +4233,7 @@ impl DeviceManager { info!("Creating ivshmem device {}", id); let (pci_segment_id, pci_device_bdf, resources) = - self.pci_resources(&id, pci_segment_id)?; + self.pci_resources(&id, pci_segment_id, None)?; let snapshot = snapshot_from_id(self.snapshot.as_ref(), id.as_str()); let ivshmem_ops = Arc::new(Mutex::new(IvshmemHandler { @@ -4263,6 +4278,7 @@ impl DeviceManager { &self, id: &str, pci_segment_id: u16, + pci_device_id: Option, ) -> DeviceManagerResult<(u16, PciBdf, Option>)> { // Look for the id in the device tree. If it can be found, that means // the device is being restored, otherwise it's created from scratch. @@ -4290,7 +4306,7 @@ impl DeviceManager { (pci_segment_id, pci_device_bdf, resources) } else { let pci_device_bdf = - self.pci_segments[pci_segment_id as usize].allocate_device_bdf(None)?; + self.pci_segments[pci_segment_id as usize].allocate_device_bdf(pci_device_id)?; (pci_segment_id, pci_device_bdf, None) }) @@ -4770,6 +4786,7 @@ impl DeviceManager { handle.id.clone(), handle.pci_segment, handle.dma_handler, + None, )?; // Update the PCIU bitmap diff --git a/vmm/src/lib.rs b/vmm/src/lib.rs index 7ec33a23df..256976355f 100644 --- a/vmm/src/lib.rs +++ b/vmm/src/lib.rs @@ -3475,6 +3475,7 @@ mod unit_tests { rng: RngConfig { src: PathBuf::from("/dev/urandom"), iommu: false, + bdf_device: None, }, balloon: None, fs: None, @@ -3485,6 +3486,7 @@ mod unit_tests { iommu: false, socket: None, url: None, + bdf_device: None, }, console: ConsoleConfig { file: None, @@ -3493,6 +3495,7 @@ mod unit_tests { iommu: false, socket: None, url: None, + bdf_device: None, }, #[cfg(target_arch = "x86_64")] debug_console: DebugConsoleConfig::default(), diff --git a/vmm/src/vm_config.rs b/vmm/src/vm_config.rs index d7062e29ea..45bd382b9c 100644 --- a/vmm/src/vm_config.rs +++ b/vmm/src/vm_config.rs @@ -276,6 +276,8 @@ pub struct DiskConfig { pub serial: Option, #[serde(default)] pub queue_affinity: Option>, + #[serde(default)] + pub bdf_device: Option, } impl ApplyLandlock for DiskConfig { @@ -341,6 +343,8 @@ pub struct NetConfig { pub offload_ufo: bool, #[serde(default = "default_netconfig_true")] pub offload_csum: bool, + #[serde(default)] + pub bdf_device: Option, } pub fn default_netconfig_true() -> bool { @@ -401,6 +405,8 @@ pub struct RngConfig { pub src: PathBuf, #[serde(default)] pub iommu: bool, + #[serde(default)] + pub bdf_device: Option, } pub const DEFAULT_RNG_SOURCE: &str = "/dev/urandom"; @@ -410,6 +416,7 @@ impl Default for RngConfig { RngConfig { src: PathBuf::from(DEFAULT_RNG_SOURCE), iommu: false, + bdf_device: None, } } } @@ -431,6 +438,8 @@ pub struct BalloonConfig { /// Option to enable free page reporting from the guest. #[serde(default)] pub free_page_reporting: bool, + #[serde(default)] + pub bdf_device: Option, } #[cfg(feature = "pvmemcontrol")] @@ -449,6 +458,8 @@ pub struct FsConfig { pub id: Option, #[serde(default)] pub pci_segment: u16, + #[serde(default)] + pub bdf_device: Option, } pub fn default_fsconfig_num_queues() -> usize { @@ -479,6 +490,8 @@ pub struct PmemConfig { pub id: Option, #[serde(default)] pub pci_segment: u16, + #[serde(default)] + pub bdf_device: Option, } impl ApplyLandlock for PmemConfig { @@ -509,6 +522,9 @@ pub struct ConsoleConfig { pub iommu: bool, pub socket: Option, pub url: Option, + /// PCI BDF to attach the console in the guest to + #[serde(default)] + pub bdf_device: Option, } pub fn default_consoleconfig_file() -> Option { @@ -614,6 +630,8 @@ pub struct VdpaConfig { pub id: Option, #[serde(default)] pub pci_segment: u16, + #[serde(default)] + pub bdf_device: Option, } pub fn default_vdpaconfig_num_queues() -> usize { @@ -637,6 +655,8 @@ pub struct VsockConfig { pub id: Option, #[serde(default)] pub pci_segment: u16, + #[serde(default)] + pub bdf_device: Option, } impl ApplyLandlock for VsockConfig { @@ -859,6 +879,7 @@ pub fn default_serial() -> ConsoleConfig { iommu: false, socket: None, url: None, + bdf_device: None, } } @@ -869,6 +890,7 @@ pub fn default_console() -> ConsoleConfig { iommu: false, socket: None, url: None, + bdf_device: None, } } From ad23de4f706ac5a0ee7ca6bb9c8102e422dbd791 Mon Sep 17 00:00:00 2001 From: Pascal Scholz Date: Wed, 3 Dec 2025 12:15:21 +0100 Subject: [PATCH 271/294] vmm: Add tests for BDF device address parsing in configs Signed-off-by: Pascal Scholz On-behalf-of: SAP pascal.scholz@sap.com --- src/main.rs | 52 +++++++++++++++++++++++++++++++++--------- vmm/src/config.rs | 57 +++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 99 insertions(+), 10 deletions(-) diff --git a/src/main.rs b/src/main.rs index 97246e0126..87a13805a9 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1214,6 +1214,24 @@ mod unit_tests { }"#, true, ), + ( + vec![ + "cloud-hypervisor", + "--kernel", + "/path/to/kernel", + "--disk", + "path=/path/to/disk/1,addr=15.0", + "path=/path/to/disk/2", + ], + r#"{ + "payload": {"kernel": "/path/to/kernel"}, + "disks": [ + {"path": "/path/to/disk/1", "bdf_device": 21}, + {"path": "/path/to/disk/2"} + ] + }"#, + true, + ), ( vec![ "cloud-hypervisor", @@ -1425,6 +1443,20 @@ mod unit_tests { }"#, true, ), + ( + vec![ + "cloud-hypervisor", "--kernel", "/path/to/kernel", + "--net", + "mac=12:34:56:78:90:ab,host_mac=34:56:78:90:ab:cd,tap=tap0,ip=1.2.3.4,mask=5.6.7.8,addr=08.0", + ], + r#"{ + "payload": {"kernel": "/path/to/kernel"}, + "net": [ + {"mac": "12:34:56:78:90:ab", "host_mac": "34:56:78:90:ab:cd", "tap": "tap0", "ip": "1.2.3.4", "mask": "5.6.7.8", "num_queues": 2, "queue_size": 256, "bdf_device": 8} + ] + }"#, + true, + ), #[cfg(target_arch = "x86_64")] ( vec![ @@ -1496,11 +1528,11 @@ mod unit_tests { "--kernel", "/path/to/kernel", "--rng", - "src=/path/to/entropy/source", + "src=/path/to/entropy/source,addr=11.0", ], r#"{ "payload": {"kernel": "/path/to/kernel"}, - "rng": {"src": "/path/to/entropy/source"} + "rng": {"src": "/path/to/entropy/source", "bdf_device": 17} }"#, true, )] @@ -1517,14 +1549,14 @@ mod unit_tests { "cloud-hypervisor", "--kernel", "/path/to/kernel", "--memory", "shared=true", "--fs", - "tag=virtiofs1,socket=/path/to/sock1", + "tag=virtiofs1,socket=/path/to/sock1,addr=10.0", "tag=virtiofs2,socket=/path/to/sock2", ], r#"{ "payload": {"kernel": "/path/to/kernel"}, "memory" : { "shared": true, "size": 536870912 }, "fs": [ - {"tag": "virtiofs1", "socket": "/path/to/sock1"}, + {"tag": "virtiofs1", "socket": "/path/to/sock1", "bdf_device": 16}, {"tag": "virtiofs2", "socket": "/path/to/sock2"} ] }"#, @@ -1596,13 +1628,13 @@ mod unit_tests { "--kernel", "/path/to/kernel", "--pmem", - "file=/path/to/img/1,size=1G", + "file=/path/to/img/1,size=1G,addr=1F.0", "file=/path/to/img/2,size=2G", ], r#"{ "payload": {"kernel": "/path/to/kernel"}, "pmem": [ - {"file": "/path/to/img/1", "size": 1073741824}, + {"file": "/path/to/img/1", "size": 1073741824,"bdf_device": 31}, {"file": "/path/to/img/2", "size": 2147483648} ] }"#, @@ -1880,13 +1912,13 @@ mod unit_tests { "--kernel", "/path/to/kernel", "--vdpa", - "path=/path/to/device/1", + "path=/path/to/device/1,addr=18.0", "path=/path/to/device/2,num_queues=2", ], r#"{ "payload": {"kernel": "/path/to/kernel"}, "vdpa": [ - {"path": "/path/to/device/1", "num_queues": 1}, + {"path": "/path/to/device/1", "num_queues": 1, "bdf_device": 24}, {"path": "/path/to/device/2", "num_queues": 2} ] }"#, @@ -1925,11 +1957,11 @@ mod unit_tests { "--kernel", "/path/to/kernel", "--vsock", - "cid=123,socket=/path/to/sock/1", + "cid=123,socket=/path/to/sock/1,addr=0F.0", ], r#"{ "payload": {"kernel": "/path/to/kernel"}, - "vsock": {"cid": 123, "socket": "/path/to/sock/1"} + "vsock": {"cid": 123, "socket": "/path/to/sock/1", "bdf_device": 15} }"#, true, ), diff --git a/vmm/src/config.rs b/vmm/src/config.rs index 1b37f63328..2ca2505814 100644 --- a/vmm/src/config.rs +++ b/vmm/src/config.rs @@ -3609,6 +3609,13 @@ mod tests { ..disk_fixture() } ); + assert_eq!( + DiskConfig::parse("path=/path/to_file,addr=15.0")?, + DiskConfig { + bdf_device: Some(21), + ..disk_fixture() + } + ); Ok(()) } @@ -3698,6 +3705,14 @@ mod tests { } ); + assert_eq!( + NetConfig::parse("mac=de:ad:be:ef:12:34,host_mac=12:34:de:ad:be:ef,addr=08.0")?, + NetConfig { + bdf_device: Some(8), + ..net_fixture() + } + ); + Ok(()) } @@ -3726,6 +3741,13 @@ mod tests { ..Default::default() } ); + assert_eq!( + RngConfig::parse("addr=10.0")?, + RngConfig { + bdf_device: Some(16), + ..Default::default() + } + ); Ok(()) } @@ -3757,6 +3779,14 @@ mod tests { } ); + assert_eq!( + FsConfig::parse("tag=mytag,socket=/tmp/sock,addr=0F.0")?, + FsConfig { + bdf_device: Some(15), + ..fs_fixture() + } + ); + Ok(()) } @@ -3796,6 +3826,13 @@ mod tests { ..pmem_fixture() } ); + assert_eq!( + PmemConfig::parse("file=/tmp/pmem,size=128M,addr=1F.0")?, + PmemConfig { + bdf_device: Some(31), + ..pmem_fixture() + } + ); Ok(()) } @@ -3958,6 +3995,13 @@ mod tests { ..vdpa_fixture() } ); + assert_eq!( + VdpaConfig::parse("path=/dev/vhost-vdpa,addr=0A.0")?, + VdpaConfig { + bdf_device: Some(10), + ..vdpa_fixture() + } + ); Ok(()) } @@ -4000,6 +4044,18 @@ mod tests { bdf_device: None, } ); + + assert_eq!( + VsockConfig::parse("socket=/tmp/sock,cid=3,iommu=on,addr=08.0")?, + VsockConfig { + cid: 3, + socket: PathBuf::from("/tmp/sock"), + iommu: true, + id: None, + pci_segment: 0, + bdf_device: Some(8), + } + ); Ok(()) } @@ -4077,6 +4133,7 @@ mod tests { id: Some("net0".to_owned()), num_queues: 2, fds: Some(vec![-1, -1, -1, -1]), + bdf_device: Some(15), ..net_fixture() }, NetConfig { From b0b08e383129ebb8db5b27e287263e8614abe82e Mon Sep 17 00:00:00 2001 From: Pascal Scholz Date: Mon, 17 Nov 2025 14:27:39 +0100 Subject: [PATCH 272/294] vmm: Use device ID part of bdf for hotplugging Signed-off-by: Pascal Scholz On-behalf-of: SAP pascal.scholz@sap.com --- vmm/src/device_manager.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vmm/src/device_manager.rs b/vmm/src/device_manager.rs index c4e45937f8..8b8ba09e5d 100644 --- a/vmm/src/device_manager.rs +++ b/vmm/src/device_manager.rs @@ -4786,7 +4786,7 @@ impl DeviceManager { handle.id.clone(), handle.pci_segment, handle.dma_handler, - None, + handle.bdf_device, )?; // Update the PCIU bitmap From 48b8abfec081e3befbc7e4ac074b05d1cbb81135 Mon Sep 17 00:00:00 2001 From: Pascal Scholz Date: Wed, 3 Dec 2025 12:18:56 +0100 Subject: [PATCH 273/294] vmm: Use `VecDeque` to collect virtio devices We use `VecDeque` to sort devices implicitly. Devices whose config contains a fixed BDF are added to the front, while those without a BDF given are added to the back. Processing the `VecDeque` sequentially from first to last then ensures that no clashes occur when assigning BDFs to devices. Otherwise, we could end up in the case that we assigned a BDF required by one device's config to one without a BDF. Signed-off-by: Pascal Scholz On-behalf-of: SAP pascal.scholz@sap.com --- vmm/src/device_manager.rs | 81 +++++++++++++++++++++++++++++---------- 1 file changed, 61 insertions(+), 20 deletions(-) diff --git a/vmm/src/device_manager.rs b/vmm/src/device_manager.rs index 8b8ba09e5d..4206aa155d 100644 --- a/vmm/src/device_manager.rs +++ b/vmm/src/device_manager.rs @@ -9,7 +9,7 @@ // SPDX-License-Identifier: Apache-2.0 AND BSD-3-Clause // -use std::collections::{BTreeMap, BTreeSet, HashMap}; +use std::collections::{BTreeMap, BTreeSet, HashMap, VecDeque}; use std::fs::{File, OpenOptions}; use std::io::{self, IsTerminal, Seek, SeekFrom, stdout}; use std::num::Wrapping; @@ -982,7 +982,7 @@ pub struct DeviceManager { cpu_manager: Arc>, // The virtio devices on the system - virtio_devices: Vec, + virtio_devices: VecDeque, /// All disks. Needed for locking and unlocking the images. block_devices: Vec>>, @@ -1321,7 +1321,7 @@ impl DeviceManager { config, memory_manager, cpu_manager, - virtio_devices: Vec::new(), + virtio_devices: VecDeque::new(), block_devices: vec![], bus_devices: Vec::new(), device_id_cnt, @@ -2368,15 +2368,21 @@ impl DeviceManager { ) .map_err(DeviceManagerError::CreateVirtioConsole)?; let virtio_console_device = Arc::new(Mutex::new(virtio_console_device)); - self.virtio_devices.push(MetaVirtioDevice { + let device = MetaVirtioDevice { virtio_device: Arc::clone(&virtio_console_device) as Arc>, iommu: console_config.iommu, id: id.clone(), pci_segment: 0, dma_handler: None, - bdf_device: None, - }); + bdf_device: console_config.bdf_device, + }; + + if console_config.bdf_device.is_some() { + self.virtio_devices.push_front(device); + } else { + self.virtio_devices.push_back(device); + } // Fill the device tree with a new node. In case of restore, we // know there is nothing to do, so we can simply override the @@ -2832,7 +2838,11 @@ impl DeviceManager { if let Some(disk_list_cfg) = &mut block_devices { for disk_cfg in disk_list_cfg.iter_mut() { let device = self.make_virtio_block_device(disk_cfg, false)?; - self.virtio_devices.push(device); + if disk_cfg.bdf_device.is_some() { + self.virtio_devices.push_front(device); + } else { + self.virtio_devices.push_back(device); + } } } self.config.lock().unwrap().disks = block_devices; @@ -3010,7 +3020,11 @@ impl DeviceManager { if let Some(net_list_cfg) = &mut net_devices { for net_cfg in net_list_cfg.iter_mut() { let device = self.make_virtio_net_device(net_cfg)?; - self.virtio_devices.push(device); + if net_cfg.bdf_device.is_some() { + self.virtio_devices.push_front(device); + } else { + self.virtio_devices.push_back(device); + } } } self.config.lock().unwrap().net = net_devices; @@ -3039,15 +3053,20 @@ impl DeviceManager { ) .map_err(DeviceManagerError::CreateVirtioRng)?, )); - self.virtio_devices.push(MetaVirtioDevice { + let device = MetaVirtioDevice { virtio_device: Arc::clone(&virtio_rng_device) as Arc>, iommu: rng_config.iommu, id: id.clone(), pci_segment: 0, dma_handler: None, - bdf_device: None, - }); + bdf_device: rng_config.bdf_device, + }; + if rng_config.bdf_device.is_some() { + self.virtio_devices.push_front(device); + } else { + self.virtio_devices.push_back(device); + } // Fill the device tree with a new node. In case of restore, we // know there is nothing to do, so we can simply override the @@ -3120,7 +3139,11 @@ impl DeviceManager { if let Some(fs_list_cfg) = &mut fs_devices { for fs_cfg in fs_list_cfg.iter_mut() { let device = self.make_virtio_fs_device(fs_cfg)?; - self.virtio_devices.push(device); + if fs_cfg.bdf_device.is_some() { + self.virtio_devices.push_front(device); + } else { + self.virtio_devices.push_back(device); + } } } self.config.lock().unwrap().fs = fs_devices; @@ -3307,7 +3330,11 @@ impl DeviceManager { if let Some(pmem_list_cfg) = &mut pmem_devices { for pmem_cfg in pmem_list_cfg.iter_mut() { let device = self.make_virtio_pmem_device(pmem_cfg)?; - self.virtio_devices.push(device); + if pmem_cfg.bdf_device.is_some() { + self.virtio_devices.push_front(device); + } else { + self.virtio_devices.push_back(device); + } } } self.config.lock().unwrap().pmem = pmem_devices; @@ -3377,7 +3404,11 @@ impl DeviceManager { let mut vsock = self.config.lock().unwrap().vsock.clone(); if let Some(vsock_cfg) = &mut vsock { let device = self.make_virtio_vsock_device(vsock_cfg)?; - self.virtio_devices.push(device); + if vsock_cfg.bdf_device.is_some() { + self.virtio_devices.push_front(device); + } else { + self.virtio_devices.push_back(device); + } } self.config.lock().unwrap().vsock = vsock; @@ -3419,7 +3450,7 @@ impl DeviceManager { self.virtio_mem_devices.push(Arc::clone(&virtio_mem_device)); - self.virtio_devices.push(MetaVirtioDevice { + self.virtio_devices.push_back(MetaVirtioDevice { virtio_device: Arc::clone(&virtio_mem_device) as Arc>, iommu: false, @@ -3507,7 +3538,7 @@ impl DeviceManager { self.balloon = Some(virtio_balloon_device.clone()); - self.virtio_devices.push(MetaVirtioDevice { + let device = MetaVirtioDevice { virtio_device: Arc::clone(&virtio_balloon_device) as Arc>, iommu: false, @@ -3515,7 +3546,13 @@ impl DeviceManager { pci_segment: 0, dma_handler: None, bdf_device: balloon_config.bdf_device, - }); + }; + + if balloon_config.bdf_device.is_some() { + self.virtio_devices.push_front(device); + } else { + self.virtio_devices.push_back(device); + } self.device_tree .lock() @@ -3547,7 +3584,7 @@ impl DeviceManager { ) .map_err(DeviceManagerError::CreateVirtioWatchdog)?, )); - self.virtio_devices.push(MetaVirtioDevice { + self.virtio_devices.push_back(MetaVirtioDevice { virtio_device: Arc::clone(&virtio_watchdog_device) as Arc>, iommu: false, @@ -3623,7 +3660,11 @@ impl DeviceManager { if let Some(vdpa_list_cfg) = &mut vdpa_devices { for vdpa_cfg in vdpa_list_cfg.iter_mut() { let device = self.make_vdpa_device(vdpa_cfg)?; - self.virtio_devices.push(device); + if vdpa_cfg.bdf_device.is_some() { + self.virtio_devices.push_front(device); + } else { + self.virtio_devices.push_back(device); + } } } self.config.lock().unwrap().vdpa = vdpa_devices; @@ -4772,7 +4813,7 @@ impl DeviceManager { // Add the virtio device to the device manager list. This is important // as the list is used to notify virtio devices about memory updates // for instance. - self.virtio_devices.push(handle.clone()); + self.virtio_devices.push_back(handle.clone()); let mapping: Option> = if handle.iommu { self.iommu_mapping.clone() From 21d17bf01b64d07f54e9a11b1ba8751b74e31e85 Mon Sep 17 00:00:00 2001 From: Sebastian Eydam Date: Wed, 5 Nov 2025 14:24:01 +0100 Subject: [PATCH 274/294] vm-migration: add the client side of the TLS connection TLS connections have a TLS server (the endpoint that listens for a connection) and a TLS client (the endpoint that initiates the connection). This commit adds the code for the client side, which will be the source host. On-behalf-of: SAP sebastian.eydam@sap.com Signed-off-by: Sebastian Eydam --- Cargo.lock | 227 +++++++++++++++++++++++++++++++++++++++- Cargo.toml | 2 + vm-migration/Cargo.toml | 1 + vm-migration/src/lib.rs | 4 + vm-migration/src/tls.rs | 136 ++++++++++++++++++++++++ 5 files changed, 368 insertions(+), 2 deletions(-) create mode 100644 vm-migration/src/tls.rs diff --git a/Cargo.lock b/Cargo.lock index 1989a0e18b..92b01e42c1 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -264,6 +264,29 @@ version = "1.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c08606f8c3cbf4ce6ec8e28fb0014a2c086708fe954eaa885384a6165172e7e8" +[[package]] +name = "aws-lc-rs" +version = "1.14.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "879b6c89592deb404ba4dc0ae6b58ffd1795c78991cbb5b8bc441c48a070440d" +dependencies = [ + "aws-lc-sys", + "zeroize", +] + +[[package]] +name = "aws-lc-sys" +version = "0.32.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "107a4e9d9cab9963e04e84bb8dee0e25f2a987f9a8bad5ed054abd439caa8f8c" +dependencies = [ + "bindgen", + "cc", + "cmake", + "dunce", + "fs_extra", +] + [[package]] name = "backtrace" version = "0.3.75" @@ -279,6 +302,26 @@ dependencies = [ "windows-targets 0.52.6", ] +[[package]] +name = "bindgen" +version = "0.72.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "993776b509cfb49c750f11b8f07a46fa23e0a1386ffc01fb1e7d343efc387895" +dependencies = [ + "bitflags 2.9.4", + "cexpr", + "clang-sys", + "itertools 0.13.0", + "log", + "prettyplease", + "proc-macro2", + "quote", + "regex", + "rustc-hash 2.1.1", + "shlex", + "syn", +] + [[package]] name = "bitfield-struct" version = "0.10.1" @@ -354,9 +397,20 @@ version = "1.2.34" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "42bc4aea80032b7bf409b0bc7ccad88853858911b7713a8062fdc0623867bedc" dependencies = [ + "jobserver", + "libc", "shlex", ] +[[package]] +name = "cexpr" +version = "0.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6fac387a98bb7c37292057cffc56d62ecb629900026402633ae9160df93a8766" +dependencies = [ + "nom", +] + [[package]] name = "cfg-if" version = "1.0.0" @@ -369,6 +423,17 @@ version = "0.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "613afe47fcd5fac7ccf1db93babcb082c5994d996f20b8b159f2ad1658eb5724" +[[package]] +name = "clang-sys" +version = "1.8.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0b023947811758c97c59bf9d1c188fd619ad4718dcaa767947df1cadb14f39f4" +dependencies = [ + "glob", + "libc", + "libloading", +] + [[package]] name = "clap" version = "4.5.47" @@ -414,6 +479,7 @@ dependencies = [ "log", "net_util", "option_parser", + "rustls", "seccompiler", "serde_json", "signal-hook", @@ -428,6 +494,15 @@ dependencies = [ "zbus", ] +[[package]] +name = "cmake" +version = "0.1.54" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e7caa3f9de89ddbe2c607f4101924c5abec803763ae9534e4f4d7d8f84aa81f0" +dependencies = [ + "cc", +] + [[package]] name = "colorchoice" version = "1.0.3" @@ -556,7 +631,7 @@ dependencies = [ "lazy_static", "mintex", "parking_lot", - "rustc-hash", + "rustc-hash 1.1.0", "serde", "serde_json", "thousands", @@ -583,6 +658,12 @@ dependencies = [ "windows-sys 0.60.2", ] +[[package]] +name = "dunce" +version = "1.0.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "92773504d58c093f6de2459af4af33faa518c13451eb8f2b5698ed3d36e7c813" + [[package]] name = "either" version = "1.15.0" @@ -726,6 +807,12 @@ version = "1.0.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1" +[[package]] +name = "fs_extra" +version = "1.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "42703706b716c37f96a77aea830392ad231f44c9e9a67872fa5548707e11b11c" + [[package]] name = "futures" version = "0.3.31" @@ -1017,6 +1104,15 @@ version = "1.70.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7943c866cc5cd64cbc25b2e01621d07fa8eb2a1a23160ee81ce38704e97b8ecf" +[[package]] +name = "itertools" +version = "0.13.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "413ee7dfc52ee1a4949ceeb7dbc8a33f2d6c088194d9f922fb8318faf1f01186" +dependencies = [ + "either", +] + [[package]] name = "itertools" version = "0.14.0" @@ -1056,6 +1152,16 @@ dependencies = [ "syn", ] +[[package]] +name = "jobserver" +version = "0.1.34" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9afb3de4395d6b3e67a780b6de64b51c978ecf11cb9a462c66be7d4ca9039d33" +dependencies = [ + "getrandom 0.3.3", + "libc", +] + [[package]] name = "js-sys" version = "0.3.77" @@ -1112,6 +1218,16 @@ version = "0.2.172" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d750af042f7ef4f724306de029d18836c26c1765a54a6a3f094cbd23a7267ffa" +[[package]] +name = "libloading" +version = "0.8.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d7c4b02199fee7c5d21a5ae7d8cfa79a6ef5bb2fc834d6e9058e89c825efdc55" +dependencies = [ + "cfg-if", + "windows-link", +] + [[package]] name = "libredox" version = "0.1.3" @@ -1214,6 +1330,12 @@ dependencies = [ "vmm-sys-util", ] +[[package]] +name = "minimal-lexical" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "68354c5c6bd36d73ff3feceb05efa59b6acb7626617f4962be322a825e61f79a" + [[package]] name = "miniz_oxide" version = "0.8.9" @@ -1312,6 +1434,16 @@ version = "0.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "43794a0ace135be66a25d3ae77d41b91615fb68ae937f904090203e81f755b65" +[[package]] +name = "nom" +version = "7.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d273983c5a657a70a3e8f2a01329822f3b8c8172b73826411a55751e404a0a4a" +dependencies = [ + "memchr", + "minimal-lexical", +] + [[package]] name = "num-traits" version = "0.2.19" @@ -1649,6 +1781,16 @@ dependencies = [ "zerocopy 0.7.35", ] +[[package]] +name = "prettyplease" +version = "0.2.37" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "479ca8adacdd7ce8f1fb39ce9ecccbfe93a3f1344b3d0d97f20bc0196208f62b" +dependencies = [ + "proc-macro2", + "syn", +] + [[package]] name = "proc-macro-crate" version = "3.3.0" @@ -1788,6 +1930,20 @@ dependencies = [ "syn", ] +[[package]] +name = "ring" +version = "0.17.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a4689e6c2294d81e88dc6261c768b63bc4fcdb852be6d1352498b114f61383b7" +dependencies = [ + "cc", + "cfg-if", + "getrandom 0.2.15", + "libc", + "untrusted", + "windows-sys 0.52.0", +] + [[package]] name = "rustc-demangle" version = "0.1.26" @@ -1800,6 +1956,12 @@ version = "1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "08d43f7aa6b08d49f382cde6a7982047c3426db949b1424bc4b7ec9ae12c6ce2" +[[package]] +name = "rustc-hash" +version = "2.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "357703d41365b4b27c590e3ed91eabb1b663f07c4c084095e60cbed4362dff0d" + [[package]] name = "rustix" version = "0.38.44" @@ -1826,6 +1988,42 @@ dependencies = [ "windows-sys 0.59.0", ] +[[package]] +name = "rustls" +version = "0.23.34" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6a9586e9ee2b4f8fab52a0048ca7334d7024eef48e2cb9407e3497bb7cab7fa7" +dependencies = [ + "aws-lc-rs", + "log", + "once_cell", + "rustls-pki-types", + "rustls-webpki", + "subtle", + "zeroize", +] + +[[package]] +name = "rustls-pki-types" +version = "1.12.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "229a4a4c221013e7e1f1a043678c5cc39fe5171437c88fb47151a21e6f5b5c79" +dependencies = [ + "zeroize", +] + +[[package]] +name = "rustls-webpki" +version = "0.103.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e10b3f4191e8a80e6b43eebabfac91e5dcecebb27a71f04e820c47ec41d314bf" +dependencies = [ + "aws-lc-rs", + "ring", + "rustls-pki-types", + "untrusted", +] + [[package]] name = "rustversion" version = "1.0.22" @@ -1993,6 +2191,12 @@ version = "0.11.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7da8b5736845d9f2fcb837ea5d9e2628564b3b043a70948a3f0b778838c5fb4f" +[[package]] +name = "subtle" +version = "2.6.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "13c2bddecc57b384dee18652358fb23172facb8a2c51ccc10d74c157bdea3292" + [[package]] name = "syn" version = "2.0.106" @@ -2174,6 +2378,12 @@ version = "1.0.18" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5a5f39404a5da50712a4c1eecf25e90dd62b613502b7e925fd4e4d19b5c96512" +[[package]] +name = "untrusted" +version = "0.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8ecb6da28b8a351d773b68d5825ac39017e680750f980f3a1a85cd8dd28a47c1" + [[package]] name = "utf8parse" version = "0.2.2" @@ -2402,7 +2612,8 @@ name = "vm-migration" version = "0.1.0" dependencies = [ "anyhow", - "itertools", + "itertools 0.14.0", + "rustls", "serde", "serde_json", "thiserror 2.0.12", @@ -2592,6 +2803,12 @@ version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" +[[package]] +name = "windows-link" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f0805222e57f7521d6a62e36fa9163bc891acd422f971defe97d64e70d0a4fe5" + [[package]] name = "windows-sys" version = "0.52.0" @@ -2923,6 +3140,12 @@ dependencies = [ "syn", ] +[[package]] +name = "zeroize" +version = "1.8.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b97154e67e32c85465826e8bcc1c59429aaaf107c1e4a9e53c8d8ccd5eff88d0" + [[package]] name = "zvariant" version = "5.7.0" diff --git a/Cargo.toml b/Cargo.toml index 01e904b40a..b9599a045e 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -53,6 +53,7 @@ hypervisor = { path = "hypervisor" } libc = { workspace = true } log = { workspace = true, features = ["std"] } option_parser = { path = "option_parser" } +rustls = { workspace = true } seccompiler = { workspace = true } serde_json = { workspace = true } signal-hook = { workspace = true } @@ -165,6 +166,7 @@ flume = "0.11.1" itertools = "0.14.0" libc = "0.2.167" log = "0.4.22" +rustls = "0.23.34" signal-hook = "0.3.18" thiserror = "2.0.12" uuid = { version = "1.18.1" } diff --git a/vm-migration/Cargo.toml b/vm-migration/Cargo.toml index b17475065c..2053afc472 100644 --- a/vm-migration/Cargo.toml +++ b/vm-migration/Cargo.toml @@ -7,6 +7,7 @@ version = "0.1.0" [dependencies] anyhow = { workspace = true } itertools = { workspace = true } +rustls = { workspace = true } serde = { workspace = true, features = ["derive", "rc"] } serde_json = { workspace = true } thiserror = { workspace = true } diff --git a/vm-migration/src/lib.rs b/vm-migration/src/lib.rs index 07f409f8d0..daaa5d0d53 100644 --- a/vm-migration/src/lib.rs +++ b/vm-migration/src/lib.rs @@ -11,6 +11,7 @@ use crate::protocol::MemoryRangeTable; mod bitpos_iterator; pub mod protocol; +pub mod tls; #[derive(Error, Debug)] pub enum MigratableError { @@ -52,6 +53,9 @@ pub enum MigratableError { #[error("Failed to release a disk lock")] UnlockError(#[source] anyhow::Error), + + #[error("TLS error")] + Tls(#[from] tls::TlsError), } /// A Pausable component can be paused and resumed. diff --git a/vm-migration/src/tls.rs b/vm-migration/src/tls.rs new file mode 100644 index 0000000000..e2417f2b2b --- /dev/null +++ b/vm-migration/src/tls.rs @@ -0,0 +1,136 @@ +// Copyright © 2025 Cyberus Technology GmbH +// +// SPDX-License-Identifier: Apache-2.0 +// +use std::io::{self, Read, Write}; +use std::net::TcpStream; +use std::os::fd::{AsFd, BorrowedFd}; +use std::path::Path; +use std::sync::Arc; + +use rustls::pki_types::pem::PemObject; +use rustls::pki_types::{CertificateDer, InvalidDnsNameError, ServerName}; +use rustls::{ClientConfig, ClientConnection, RootCertStore, StreamOwned}; +use thiserror::Error; +use vm_memory::bitmap::BitmapSlice; +use vm_memory::io::{ReadVolatile, WriteVolatile}; +use vm_memory::{VolatileMemoryError, VolatileSlice}; + +use crate::MigratableError; + +#[derive(Error, Debug)] +pub enum TlsError { + #[error( + "The provided input could not be parsed because it is not a syntactically-valid DNS Name." + )] + InvalidDnsName(#[source] InvalidDnsNameError), + + #[error("Rustls protocol error")] + RustlsError(#[from] rustls::Error), + + #[error("Rustls protocol IO error")] + RustlsIoError(#[from] std::io::Error), + + #[error("Error during TLS handshake: {0}")] + HandshakeError(String), +} + +// This TlsStream will be later encapsulated in a SocketStream. Thus it has to +// implement the same traits. It is important that we never directly read from +// or write to the TcpStream encapsulated in StreamOwned. +#[derive(Debug)] +pub enum TlsStream { + Client(StreamOwned), +} + +impl Read for TlsStream { + fn read(&mut self, buf: &mut [u8]) -> io::Result { + match self { + TlsStream::Client(s) => s.read(buf), + } + } +} + +impl Write for TlsStream { + fn write(&mut self, buf: &[u8]) -> io::Result { + match self { + TlsStream::Client(s) => s.write(buf), + } + } + fn flush(&mut self) -> io::Result<()> { + match self { + TlsStream::Client(s) => s.flush(), + } + } +} + +// Reading from or writing to these FDs would break the connection, because +// those reads or writes wouldn't go through rustls. But the FD is used to wait +// until it becomes readable. +impl AsFd for TlsStream { + fn as_fd(&self) -> BorrowedFd<'_> { + match self { + TlsStream::Client(s) => s.get_ref().as_fd(), + } + } +} + +impl ReadVolatile for TlsStream { + fn read_volatile( + &mut self, + vs: &mut VolatileSlice, + ) -> std::result::Result { + let mut tmp = vec![0u8; vs.len()]; + let n = Read::read(self, &mut tmp[..]).unwrap(); + vs.copy_from(&tmp[..n]); + Ok(n) + } +} + +impl WriteVolatile for TlsStream { + fn write_volatile( + &mut self, + vs: &VolatileSlice, + ) -> std::result::Result { + let mut tmp = vec![0u8; vs.len()]; + let n = vs.copy_to(&mut tmp[..]); + let n = Write::write(self, &tmp[..n]).unwrap(); + Ok(n) + } +} + +pub fn client_stream( + socket: TcpStream, + cert_dir: &Path, + hostname: &str, +) -> std::result::Result, MigratableError> { + let mut root_store = RootCertStore::empty(); + root_store.add_parsable_certificates( + CertificateDer::pem_file_iter(cert_dir.join("ca-cert.pem")) + .expect("Cannot open CA file") + .map(|result| result.unwrap()), + ); + let config = ClientConfig::builder() + .with_root_certificates(root_store) + .with_no_client_auth(); + let config = Arc::new(config); + let server_name = + ServerName::try_from(hostname.to_string()).map_err(TlsError::InvalidDnsName)?; + let conn = ClientConnection::new(config.clone(), server_name.clone()) + .map_err(TlsError::RustlsError)?; + + let mut tls = StreamOwned::new(conn, socket); + while tls.conn.is_handshaking() { + let (rd, wr) = tls + .conn + .complete_io(&mut tls.sock) + .map_err(TlsError::RustlsIoError)?; + if rd == 0 && wr == 0 { + Err(TlsError::HandshakeError( + "EOF during TLS handshake".to_string(), + ))?; + } + } + + Ok(tls) +} From e1bbd54f453fe29bb65f59ba0beb9eb60d49f569 Mon Sep 17 00:00:00 2001 From: Sebastian Eydam Date: Wed, 5 Nov 2025 14:25:28 +0100 Subject: [PATCH 275/294] vm-migration: add the server side of the TLS encryption This is the TLS server side, which will be the live migration target. On-behalf-of: SAP sebastian.eydam@sap.com Signed-off-by: Sebastian Eydam --- vm-migration/src/tls.rs | 55 +++++++++++++++++++++++++++++++++++++++-- 1 file changed, 53 insertions(+), 2 deletions(-) diff --git a/vm-migration/src/tls.rs b/vm-migration/src/tls.rs index e2417f2b2b..50fece3ad3 100644 --- a/vm-migration/src/tls.rs +++ b/vm-migration/src/tls.rs @@ -9,8 +9,10 @@ use std::path::Path; use std::sync::Arc; use rustls::pki_types::pem::PemObject; -use rustls::pki_types::{CertificateDer, InvalidDnsNameError, ServerName}; -use rustls::{ClientConfig, ClientConnection, RootCertStore, StreamOwned}; +use rustls::pki_types::{CertificateDer, InvalidDnsNameError, PrivateKeyDer, ServerName}; +use rustls::{ + ClientConfig, ClientConnection, RootCertStore, ServerConfig, ServerConnection, StreamOwned, +}; use thiserror::Error; use vm_memory::bitmap::BitmapSlice; use vm_memory::io::{ReadVolatile, WriteVolatile}; @@ -41,12 +43,14 @@ pub enum TlsError { #[derive(Debug)] pub enum TlsStream { Client(StreamOwned), + Server(StreamOwned), } impl Read for TlsStream { fn read(&mut self, buf: &mut [u8]) -> io::Result { match self { TlsStream::Client(s) => s.read(buf), + TlsStream::Server(s) => s.read(buf), } } } @@ -55,11 +59,13 @@ impl Write for TlsStream { fn write(&mut self, buf: &[u8]) -> io::Result { match self { TlsStream::Client(s) => s.write(buf), + TlsStream::Server(s) => s.write(buf), } } fn flush(&mut self) -> io::Result<()> { match self { TlsStream::Client(s) => s.flush(), + TlsStream::Server(s) => s.flush(), } } } @@ -71,6 +77,7 @@ impl AsFd for TlsStream { fn as_fd(&self) -> BorrowedFd<'_> { match self { TlsStream::Client(s) => s.get_ref().as_fd(), + TlsStream::Server(s) => s.get_ref().as_fd(), } } } @@ -99,6 +106,50 @@ impl WriteVolatile for TlsStream { } } +// A small wrapper to be put into ReceiveListener::Tls. It carries the +// TLS-Config and creates a TlsStream after the TcpConnection accepted a +// connection. +#[derive(Debug, Clone)] +pub struct TlsConnectionWrapper { + config: Arc, +} + +impl TlsConnectionWrapper { + pub fn new(cert_dir: &Path) -> Self { + let certs = CertificateDer::pem_file_iter(cert_dir.join("server-cert.pem")) + .unwrap() + .map(|cert| cert.unwrap()) + .collect(); + let key = PrivateKeyDer::from_pem_file(cert_dir.join("server-key.pem")).unwrap(); + let config = ServerConfig::builder() + .with_no_client_auth() + .with_single_cert(certs, key) + .map_err(TlsError::RustlsError) + .unwrap(); + let config = Arc::new(config); + Self { config } + } + + pub fn wrap(&self, socket: TcpStream) -> std::result::Result { + let conn = ServerConnection::new(self.config.clone()).map_err(TlsError::RustlsError)?; + + let mut tls = StreamOwned::new(conn, socket); + while tls.conn.is_handshaking() { + let (rd, wr) = tls + .conn + .complete_io(&mut tls.sock) + .map_err(TlsError::RustlsIoError)?; + if rd == 0 && wr == 0 { + Err(TlsError::HandshakeError( + "EOF during TLS handshake".to_string(), + ))?; + } + } + + Ok(TlsStream::Server(tls)) + } +} + pub fn client_stream( socket: TcpStream, cert_dir: &Path, From 7fd335b14bb4701e676d4390b8e55325e96c8777 Mon Sep 17 00:00:00 2001 From: Sebastian Eydam Date: Wed, 5 Nov 2025 14:29:56 +0100 Subject: [PATCH 276/294] vmm: remove AsRawFd trait for SocketStream, as it seems unnecessary Also it seems like AsRawFd should be avoided https://rust-lang.github.io/rfcs/3128-io-safety.html On-behalf-of: SAP sebastian.eydam@sap.com Signed-off-by: Sebastian Eydam --- vmm/src/lib.rs | 9 --------- 1 file changed, 9 deletions(-) diff --git a/vmm/src/lib.rs b/vmm/src/lib.rs index 256976355f..008fa80acd 100644 --- a/vmm/src/lib.rs +++ b/vmm/src/lib.rs @@ -303,15 +303,6 @@ impl AsFd for SocketStream { } } -impl AsRawFd for SocketStream { - fn as_raw_fd(&self) -> RawFd { - match self { - SocketStream::Unix(s) => s.as_raw_fd(), - SocketStream::Tcp(s) => s.as_raw_fd(), - } - } -} - impl ReadVolatile for SocketStream { fn read_volatile( &mut self, From e770de7b2041c7980aa1cfdf43c4e95851794cb9 Mon Sep 17 00:00:00 2001 From: Sebastian Eydam Date: Wed, 5 Nov 2025 14:30:42 +0100 Subject: [PATCH 277/294] vmm: add TLS variants to SocketStream and ReceiveListener This allows (more or less) transparent usage of TLS encrypted TCP connections. On-behalf-of: SAP sebastian.eydam@sap.com Signed-off-by: Sebastian Eydam --- vmm/src/lib.rs | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/vmm/src/lib.rs b/vmm/src/lib.rs index 008fa80acd..03a768416f 100644 --- a/vmm/src/lib.rs +++ b/vmm/src/lib.rs @@ -59,6 +59,7 @@ use vm_memory::{ VolatileMemoryError, VolatileSlice, WriteVolatile, }; use vm_migration::protocol::*; +use vm_migration::tls::{TlsConnectionWrapper, TlsStream}; use vm_migration::{Migratable, MigratableError, Pausable, Snapshot, Snapshottable, Transportable}; use vmm_sys_util::eventfd::EventFd; use vmm_sys_util::signal::unblock_signal; @@ -267,6 +268,7 @@ impl From for EpollDispatch { enum SocketStream { Unix(UnixStream), Tcp(TcpStream), + Tls(TlsStream), } impl Read for SocketStream { @@ -274,6 +276,7 @@ impl Read for SocketStream { match self { SocketStream::Unix(stream) => stream.read(buf), SocketStream::Tcp(stream) => stream.read(buf), + SocketStream::Tls(stream) => stream.read(buf), } } } @@ -283,6 +286,7 @@ impl Write for SocketStream { match self { SocketStream::Unix(stream) => stream.write(buf), SocketStream::Tcp(stream) => stream.write(buf), + SocketStream::Tls(stream) => stream.write(buf), } } @@ -290,6 +294,7 @@ impl Write for SocketStream { match self { SocketStream::Unix(stream) => stream.flush(), SocketStream::Tcp(stream) => stream.flush(), + SocketStream::Tls(stream) => stream.flush(), } } } @@ -299,6 +304,7 @@ impl AsFd for SocketStream { match self { SocketStream::Unix(s) => s.as_fd(), SocketStream::Tcp(s) => s.as_fd(), + SocketStream::Tls(s) => s.as_fd(), } } } @@ -311,6 +317,7 @@ impl ReadVolatile for SocketStream { match self { SocketStream::Unix(s) => s.read_volatile(buf), SocketStream::Tcp(s) => s.read_volatile(buf), + SocketStream::Tls(s) => s.read_volatile(buf), } } @@ -321,6 +328,7 @@ impl ReadVolatile for SocketStream { match self { SocketStream::Unix(s) => s.read_exact_volatile(buf), SocketStream::Tcp(s) => s.read_exact_volatile(buf), + SocketStream::Tls(s) => s.read_exact_volatile(buf), } } } @@ -333,6 +341,7 @@ impl WriteVolatile for SocketStream { match self { SocketStream::Unix(s) => s.write_volatile(buf), SocketStream::Tcp(s) => s.write_volatile(buf), + SocketStream::Tls(s) => s.write_volatile(buf), } } @@ -343,6 +352,7 @@ impl WriteVolatile for SocketStream { match self { SocketStream::Unix(s) => s.write_all_volatile(buf), SocketStream::Tcp(s) => s.write_all_volatile(buf), + SocketStream::Tls(s) => s.write_all_volatile(buf), } } } @@ -876,6 +886,7 @@ fn wait_for_readable( enum ReceiveListener { Tcp(TcpListener), Unix(UnixListener, Option), + Tls(TcpListener, TlsConnectionWrapper), } impl AsFd for ReceiveListener { @@ -883,6 +894,7 @@ impl AsFd for ReceiveListener { match self { ReceiveListener::Tcp(listener) => listener.as_fd(), ReceiveListener::Unix(listener, _) => listener.as_fd(), + ReceiveListener::Tls(listener, _) => listener.as_fd(), } } } @@ -910,6 +922,11 @@ impl ReceiveListener { Ok(socket) } + ReceiveListener::Tls(listener, conn) => listener.accept().map(|(socket, _)| { + conn.wrap(socket) + .map(SocketStream::Tls) + .map_err(std::io::Error::other) + })?, } } @@ -929,6 +946,9 @@ impl ReceiveListener { ReceiveListener::Unix(listener, opt_path) => listener .try_clone() .map(|listener| ReceiveListener::Unix(listener, opt_path.clone())), + ReceiveListener::Tls(listener, conn) => listener + .try_clone() + .map(|listener| ReceiveListener::Tls(listener, conn.clone())), } } } @@ -2169,6 +2189,11 @@ impl Vmm { "--local option is not supported with TCP sockets", ))); } + SocketStream::Tls(_tls_socket) => { + return Err(MigratableError::MigrateSend(anyhow!( + "--local option is not supported with TCP sockets", + ))); + } } } From d5e345b20882878440f082e43d33296283fd8847 Mon Sep 17 00:00:00 2001 From: Sebastian Eydam Date: Wed, 5 Nov 2025 16:37:56 +0100 Subject: [PATCH 278/294] vmm: use TLS encrypted live migration when TLS parameters are provided For TLS we need certificates (and a key for the TLS server). This commits adds parameters for that and encrypts the connection with TLS if the necessary parameters are provided. On-behalf-of: SAP sebastian.eydam@sap.com Signed-off-by: Sebastian Eydam --- src/bin/ch-remote.rs | 28 +++++++++++++---- vmm/src/api/mod.rs | 7 +++++ vmm/src/lib.rs | 74 ++++++++++++++++++++++++++++---------------- 3 files changed, 76 insertions(+), 33 deletions(-) diff --git a/src/bin/ch-remote.rs b/src/bin/ch-remote.rs index 74bcd80fb7..587232be2d 100644 --- a/src/bin/ch-remote.rs +++ b/src/bin/ch-remote.rs @@ -11,6 +11,7 @@ use std::io::Read; use std::marker::PhantomData; use std::num::NonZeroU32; use std::os::unix::net::UnixStream; +use std::path::PathBuf; use std::process; use api_client::{ @@ -491,7 +492,8 @@ fn rest_api_do_command(matches: &ArgMatches, socket: &mut UnixStream) -> ApiResu .subcommand_matches("send-migration") .unwrap() .get_one::("send_migration_config") - .unwrap(), + .unwrap() + .to_owned(), matches .subcommand_matches("send-migration") .unwrap() @@ -513,6 +515,11 @@ fn rest_api_do_command(matches: &ArgMatches, socket: &mut UnixStream) -> ApiResu .copied() .and_then(NonZeroU32::new) .unwrap_or(NonZeroU32::new(1).unwrap()), + matches + .subcommand_matches("send-migration") + .unwrap() + .get_one::("tls_dir") + .cloned(), ); simple_api_command(socket, "PUT", "send-migration", Some(&send_migration_data)) .map_err(Error::HttpApiClient) @@ -523,7 +530,13 @@ fn rest_api_do_command(matches: &ArgMatches, socket: &mut UnixStream) -> ApiResu .subcommand_matches("receive-migration") .unwrap() .get_one::("receive_migration_config") - .unwrap(), + .unwrap() + .to_owned(), + matches + .subcommand_matches("receive_migration") + .unwrap() + .get_one::("tls_dir") + .cloned(), ); simple_api_command( socket, @@ -930,32 +943,35 @@ fn coredump_config(destination_url: &str) -> String { serde_json::to_string(&coredump_config).unwrap() } -fn receive_migration_data(url: &str) -> String { +fn receive_migration_data(url: String, tls_dir: Option) -> String { let receive_migration_data = vmm::api::VmReceiveMigrationData { - receiver_url: url.to_owned(), + receiver_url: url, tcp_serial_url: None, // Only FDs transmitted via an SCM_RIGHTS UNIX Domain Socket message // are valid. Transmitting specific FD nums via the HTTP API is // almost always invalid. net_fds: None, + tls_dir, }; serde_json::to_string(&receive_migration_data).unwrap() } fn send_migration_data( - url: &str, + url: String, local: bool, downtime: u64, migration_timeout: u64, connections: NonZeroU32, + tls_dir: Option, ) -> String { let send_migration_data = vmm::api::VmSendMigrationData { - destination_url: url.to_owned(), + destination_url: url, local, downtime, migration_timeout, connections, + tls_dir, }; serde_json::to_string(&send_migration_data).unwrap() diff --git a/vmm/src/api/mod.rs b/vmm/src/api/mod.rs index c499c74c76..73e352a11d 100644 --- a/vmm/src/api/mod.rs +++ b/vmm/src/api/mod.rs @@ -35,6 +35,7 @@ pub mod http; use std::io; use std::num::NonZeroU32; +use std::path::PathBuf; use std::sync::mpsc::{RecvError, SendError, Sender, channel}; use micro_http::Body; @@ -265,6 +266,9 @@ pub struct VmReceiveMigrationData { pub tcp_serial_url: Option, /// Map with new network FDs on the new host. pub net_fds: Option>, + /// Directory containing the TLS server certificate (server-cert.pem) and TLS server key (server-key.pem). + #[serde(default)] + pub tls_dir: Option, } #[derive(Clone, Deserialize, Serialize, Debug)] @@ -287,6 +291,9 @@ pub struct VmSendMigrationData { /// The number of parallel connections for migration #[serde(default = "default_connections")] pub connections: NonZeroU32, + /// Directory containing the TLS root CA certificate (ca-cert.pem) + #[serde(default)] + pub tls_dir: Option, } // Default value for downtime the same as qemu. diff --git a/vmm/src/lib.rs b/vmm/src/lib.rs index 03a768416f..450f749319 100644 --- a/vmm/src/lib.rs +++ b/vmm/src/lib.rs @@ -21,7 +21,6 @@ use std::collections::HashMap; use std::fs::File; use std::io::{ErrorKind, Read, Write, stdout}; use std::net::{TcpListener, TcpStream}; -use std::num::NonZeroU32; use std::os::fd::{AsFd, BorrowedFd}; use std::os::unix::io::{AsRawFd, FromRawFd, RawFd}; use std::os::unix::net::{UnixListener, UnixStream}; @@ -60,7 +59,9 @@ use vm_memory::{ }; use vm_migration::protocol::*; use vm_migration::tls::{TlsConnectionWrapper, TlsStream}; -use vm_migration::{Migratable, MigratableError, Pausable, Snapshot, Snapshottable, Transportable}; +use vm_migration::{ + Migratable, MigratableError, Pausable, Snapshot, Snapshottable, Transportable, tls, +}; use vmm_sys_util::eventfd::EventFd; use vmm_sys_util::signal::unblock_signal; use vmm_sys_util::sock_ctrl_msg::ScmSocket; @@ -1233,15 +1234,15 @@ impl SendAdditionalConnections { const CHUNK_SIZE: u64 = 64 /* MiB */ << 20; fn new( - destination: &str, - connections: NonZeroU32, + send_data_migration: &VmSendMigrationData, guest_mem: &GuestMemoryAtomic, ) -> std::result::Result { let mut threads = Vec::new(); let mut channels = Vec::new(); - for n in 0..(connections.get() - 1) { - let socket = (match send_migration_socket(destination) { + let additional_connections = send_data_migration.connections.get() - 1; + for n in 0..(additional_connections) { + let socket = (match send_migration_socket(send_data_migration) { Err(e) if n == 0 => { // If we encounter a problem on the first additional // connection, we just assume the other side doesn't support @@ -1385,17 +1386,31 @@ impl Drop for SendAdditionalConnections { /// Establishes a connection to a migration destination socket (TCP or UNIX). fn send_migration_socket( - destination_url: &str, + send_data_migration: &VmSendMigrationData, ) -> std::result::Result { - if let Some(address) = destination_url.strip_prefix("tcp:") { + if let Some(address) = send_data_migration.destination_url.strip_prefix("tcp:") { info!("Connecting to TCP socket at {}", address); let socket = TcpStream::connect(address).map_err(|e| { MigratableError::MigrateSend(anyhow!("Error connecting to TCP socket: {}", e)) })?; - Ok(SocketStream::Tcp(socket)) - } else if let Some(path) = destination_url.strip_prefix("unix:") { + if send_data_migration.tls_dir.is_none() { + Ok(SocketStream::Tcp(socket)) + } else { + info!("Live Migration will be encrypted using TLS."); + // The address may still contain a port. I think we should build something more robust to also handle IPv6. + let tls_stream = tls::client_stream( + socket, + send_data_migration.tls_dir.as_ref().unwrap(), + address + .split_once(':') + .map(|(host, _)| host) + .unwrap_or(address), + )?; + Ok(SocketStream::Tls(TlsStream::Client(tls_stream))) + } + } else if let Some(path) = &send_data_migration.destination_url.strip_prefix("unix:") { info!("Connecting to UNIX socket at {:?}", path); let socket = UnixStream::connect(path).map_err(|e| { @@ -1405,22 +1420,30 @@ fn send_migration_socket( Ok(SocketStream::Unix(socket)) } else { Err(MigratableError::MigrateSend(anyhow!( - "Invalid destination: {destination_url}" + "Invalid destination: {}", + send_data_migration.destination_url ))) } } /// Creates a listener socket for receiving incoming migration connections (TCP or UNIX). fn receive_migration_listener( - receiver_url: &str, + receiver_data_migration: &VmReceiveMigrationData, ) -> std::result::Result { - if let Some(address) = receiver_url.strip_prefix("tcp:") { - TcpListener::bind(address) - .map_err(|e| { - MigratableError::MigrateReceive(anyhow!("Error binding to TCP socket: {}", e)) - }) - .map(ReceiveListener::Tcp) - } else if let Some(path) = receiver_url.strip_prefix("unix:") { + if let Some(address) = receiver_data_migration.receiver_url.strip_prefix("tcp:") { + let listener = TcpListener::bind(address).map_err(|e| { + MigratableError::MigrateReceive(anyhow!("Error binding to TCP socket: {}", e)) + })?; + + if receiver_data_migration.tls_dir.is_none() { + Ok(ReceiveListener::Tcp(listener)) + } else { + Ok(ReceiveListener::Tls( + listener, + TlsConnectionWrapper::new(receiver_data_migration.tls_dir.as_ref().unwrap()), + )) + } + } else if let Some(path) = receiver_data_migration.receiver_url.strip_prefix("unix:") { UnixListener::bind(path) .map_err(|e| { MigratableError::MigrateReceive(anyhow!("Error binding to UNIX socket: {}", e)) @@ -1428,7 +1451,8 @@ fn receive_migration_listener( .map(|listener| ReceiveListener::Unix(listener, Some(path.into()))) } else { Err(MigratableError::MigrateSend(anyhow!( - "Invalid source: {receiver_url}" + "Invalid source: {}", + receiver_data_migration.receiver_url ))) } } @@ -2055,11 +2079,7 @@ impl Vmm { s: &mut MigrationState, send_data_migration: &VmSendMigrationData, ) -> result::Result<(), MigratableError> { - let mem_send = SendAdditionalConnections::new( - &send_data_migration.destination_url, - send_data_migration.connections, - &vm.guest_memory(), - )?; + let mem_send = SendAdditionalConnections::new(send_data_migration, &vm.guest_memory())?; // Start logging dirty pages vm.start_dirty_log()?; @@ -2140,7 +2160,7 @@ impl Vmm { let mut s = MigrationState::new(); // Set up the socket connection - let mut socket = send_migration_socket(&send_data_migration.destination_url)?; + let mut socket = send_migration_socket(&send_data_migration)?; // Start the migration Request::start().write_to(&mut socket)?; @@ -3319,7 +3339,7 @@ impl RequestHandler for Vmm { receive_data_migration.receiver_url, &receive_data_migration.net_fds ); - let mut listener = receive_migration_listener(&receive_data_migration.receiver_url)?; + let mut listener = receive_migration_listener(&receive_data_migration)?; // Accept the connection and get the socket let mut socket = listener.accept().map_err(|e| { warn!("Failed to accept migration connection: {}", e); From d4cb05de4d7fe1b4300ce4d6bed56ae7fc04e19b Mon Sep 17 00:00:00 2001 From: Sebastian Eydam Date: Thu, 27 Nov 2025 15:24:59 +0100 Subject: [PATCH 279/294] vm-migration: speed up ReadVolatile and WriteVolatile The ReadVolatile and WriteVolatile implementations of TlsStream were very slow, mainly because they allocated a large buffer on each invocation. The TlsStreamWrapper carries a buffer that it uses for ReadVolatile and WriteVolatile and that is allocated once on creation. On-behalf-of: SAP sebastian.eydam@sap.com Signed-off-by: Sebastian Eydam --- vm-migration/src/tls.rs | 98 ++++++++++++++++++++++++++++++++++++----- vmm/src/lib.rs | 10 +++-- 2 files changed, 92 insertions(+), 16 deletions(-) diff --git a/vm-migration/src/tls.rs b/vm-migration/src/tls.rs index 50fece3ad3..a44a76ebc8 100644 --- a/vm-migration/src/tls.rs +++ b/vm-migration/src/tls.rs @@ -46,6 +46,28 @@ pub enum TlsStream { Server(StreamOwned), } +// The TLS-Stream objects cannot read or write volatile, thus we need a buffer +// between the VolatileSlice and the TLS stream (see ReadVolatile and +// WriteVolatile implementations below). Allocating this buffer in these +// function calls would make it very slow, thus we tie the buffer to the stream +// with this wrapper. +pub struct TlsStreamWrapper { + stream: TlsStream, + // Used only in ReadVolatile and WriteVolatile + buf: Vec, +} + +static MAX_CHUNK: usize = 1024 * 64; + +impl TlsStreamWrapper { + pub fn new(stream: TlsStream) -> Self { + Self { + stream, + buf: Vec::new(), + } + } +} + impl Read for TlsStream { fn read(&mut self, buf: &mut [u8]) -> io::Result { match self { @@ -55,6 +77,12 @@ impl Read for TlsStream { } } +impl Read for TlsStreamWrapper { + fn read(&mut self, buf: &mut [u8]) -> io::Result { + Read::read(&mut self.stream, buf) + } +} + impl Write for TlsStream { fn write(&mut self, buf: &[u8]) -> io::Result { match self { @@ -70,38 +98,81 @@ impl Write for TlsStream { } } +impl Write for TlsStreamWrapper { + fn write(&mut self, buf: &[u8]) -> io::Result { + Write::write(&mut self.stream, buf) + } + fn flush(&mut self) -> io::Result<()> { + Write::flush(&mut self.stream) + } +} + // Reading from or writing to these FDs would break the connection, because // those reads or writes wouldn't go through rustls. But the FD is used to wait // until it becomes readable. -impl AsFd for TlsStream { +impl AsFd for TlsStreamWrapper { fn as_fd(&self) -> BorrowedFd<'_> { - match self { + match &self.stream { TlsStream::Client(s) => s.get_ref().as_fd(), TlsStream::Server(s) => s.get_ref().as_fd(), } } } -impl ReadVolatile for TlsStream { +impl ReadVolatile for TlsStreamWrapper { fn read_volatile( &mut self, vs: &mut VolatileSlice, ) -> std::result::Result { - let mut tmp = vec![0u8; vs.len()]; - let n = Read::read(self, &mut tmp[..]).unwrap(); - vs.copy_from(&tmp[..n]); + let len = vs.len().min(MAX_CHUNK); + + if len == 0 { + return Ok(0); + } + + if self.buf.len() < len { + self.buf.resize(len, 0); + } + + let buf = &mut self.buf[..len]; + let n = + Read::read(&mut self.stream, &mut buf[..len]).map_err(VolatileMemoryError::IOError)?; + + if n == 0 { + return Ok(0); + } + + vs.copy_from(&buf[..n]); + self.buf.clear(); + Ok(n) } } -impl WriteVolatile for TlsStream { +impl WriteVolatile for TlsStreamWrapper { fn write_volatile( &mut self, vs: &VolatileSlice, ) -> std::result::Result { - let mut tmp = vec![0u8; vs.len()]; - let n = vs.copy_to(&mut tmp[..]); - let n = Write::write(self, &tmp[..n]).unwrap(); + let len = vs.len().min(MAX_CHUNK); + if len == 0 { + return Ok(0); + } + + if self.buf.len() < len { + self.buf.resize(len, 0); + } + + let buf = &mut self.buf[..len]; + let n = vs.copy_to(&mut buf[..len]); + + if n == 0 { + return Ok(0); + } + + let n = Write::write(&mut self.stream, &buf[..n]).map_err(VolatileMemoryError::IOError)?; + self.buf.clear(); + Ok(n) } } @@ -130,7 +201,10 @@ impl TlsConnectionWrapper { Self { config } } - pub fn wrap(&self, socket: TcpStream) -> std::result::Result { + pub fn wrap( + &self, + socket: TcpStream, + ) -> std::result::Result { let conn = ServerConnection::new(self.config.clone()).map_err(TlsError::RustlsError)?; let mut tls = StreamOwned::new(conn, socket); @@ -146,7 +220,7 @@ impl TlsConnectionWrapper { } } - Ok(TlsStream::Server(tls)) + Ok(TlsStreamWrapper::new(TlsStream::Server(tls))) } } diff --git a/vmm/src/lib.rs b/vmm/src/lib.rs index 450f749319..aebca782b8 100644 --- a/vmm/src/lib.rs +++ b/vmm/src/lib.rs @@ -58,7 +58,7 @@ use vm_memory::{ VolatileMemoryError, VolatileSlice, WriteVolatile, }; use vm_migration::protocol::*; -use vm_migration::tls::{TlsConnectionWrapper, TlsStream}; +use vm_migration::tls::{TlsConnectionWrapper, TlsStream, TlsStreamWrapper}; use vm_migration::{ Migratable, MigratableError, Pausable, Snapshot, Snapshottable, Transportable, tls, }; @@ -269,7 +269,7 @@ impl From for EpollDispatch { enum SocketStream { Unix(UnixStream), Tcp(TcpStream), - Tls(TlsStream), + Tls(Box), } impl Read for SocketStream { @@ -925,7 +925,7 @@ impl ReceiveListener { } ReceiveListener::Tls(listener, conn) => listener.accept().map(|(socket, _)| { conn.wrap(socket) - .map(SocketStream::Tls) + .map(|s| SocketStream::Tls(Box::new(s))) .map_err(std::io::Error::other) })?, } @@ -1408,7 +1408,9 @@ fn send_migration_socket( .map(|(host, _)| host) .unwrap_or(address), )?; - Ok(SocketStream::Tls(TlsStream::Client(tls_stream))) + Ok(SocketStream::Tls(Box::new(TlsStreamWrapper::new( + TlsStream::Client(tls_stream), + )))) } } else if let Some(path) = &send_data_migration.destination_url.strip_prefix("unix:") { info!("Connecting to UNIX socket at {:?}", path); From 0d9477d1fc8c3549c5d1feca4bb90f70608187ea Mon Sep 17 00:00:00 2001 From: Sebastian Eydam Date: Mon, 8 Dec 2025 14:41:43 +0100 Subject: [PATCH 280/294] vmm: don't panic when live migration thread encounters an error When using multiple tcp connections during live migration, the main thread spawns multiple worker threads to send data. When one of those workers encountered an error, the VMM would panic. With these changes worker threads will report errors to the main thread which can then stop the live migration without panicking. On-behalf-of: SAP sebastian.eydam@sap.com Signed-off-by: Sebastian Eydam --- vmm/src/lib.rs | 56 +++++++++++++++++++++++++++++++++++++++++++------- 1 file changed, 49 insertions(+), 7 deletions(-) diff --git a/vmm/src/lib.rs b/vmm/src/lib.rs index aebca782b8..b6a1cba271 100644 --- a/vmm/src/lib.rs +++ b/vmm/src/lib.rs @@ -27,6 +27,7 @@ use std::os::unix::net::{UnixListener, UnixStream}; use std::panic::AssertUnwindSafe; use std::path::PathBuf; use std::rc::Rc; +use std::sync::atomic::{AtomicBool, Ordering}; use std::sync::mpsc::{Receiver, RecvError, SendError, Sender, TrySendError}; use std::sync::{Arc, Barrier, Mutex}; use std::thread::JoinHandle; @@ -1187,6 +1188,13 @@ struct SendAdditionalConnections { guest_memory: GuestMemoryAtomic, threads: Vec>, channels: Vec>, + // If an error occurs in one of the worker threads, the worker signals this + // using this flag. Only the main thread checks this variable, the other + // workers will be stopped in the destructor. + cancel: Arc, + // The first worker encountering an error will transmit the error using + // this channel. + error_rx: std::sync::mpsc::Receiver, } /// Send memory from the given table. @@ -1199,7 +1207,7 @@ fn vm_send_memory( return Ok(()); } - Request::memory(table.length()).write_to(socket).unwrap(); + Request::memory(table.length()).write_to(socket)?; table.write_to(socket)?; // And then the memory itself send_memory_regions(guest_memory, table, socket)?; @@ -1239,6 +1247,8 @@ impl SendAdditionalConnections { ) -> std::result::Result { let mut threads = Vec::new(); let mut channels = Vec::new(); + let cancel = Arc::new(AtomicBool::new(false)); + let (error_tx, error_rx) = std::sync::mpsc::channel::(); let additional_connections = send_data_migration.connections.get() - 1; for n in 0..(additional_connections) { @@ -1258,6 +1268,8 @@ impl SendAdditionalConnections { let (send, recv) = std::sync::mpsc::sync_channel::( Self::BUFFERED_REQUESTS_PER_THREAD, ); + let cancel = cancel.clone(); + let err_tx = error_tx.clone(); let thread = thread::spawn(move || { info!("Spawned thread to send VM memory."); @@ -1268,9 +1280,27 @@ impl SendAdditionalConnections { for msg in recv { match msg { SendMemoryThreadMessage::Memory(table) => { - vm_send_memory(&guest_mem, &mut socket, &table).unwrap(); - total_sent += - table.ranges().iter().map(|range| range.length).sum::(); + match vm_send_memory(&guest_mem, &mut socket, &table) { + Ok(()) => { + total_sent += table + .ranges() + .iter() + .map(|range| range.length) + .sum::(); + } + Err(e) => { + // Only the first thread that encounters an + // error sends it to the main thread. + if cancel.swap(true, Ordering::AcqRel) + && let Err(e) = err_tx.send(e) + { + error!("Could not send error to main thread: {e}"); + } + // After that we exit gracefully. Note that + // this also closes our mpsc channel. + break; + } + }; } SendMemoryThreadMessage::Barrier(barrier) => { barrier.wait(); @@ -1291,6 +1321,8 @@ impl SendAdditionalConnections { guest_memory: guest_mem.clone(), threads, channels, + cancel, + error_rx, }) } @@ -1339,6 +1371,11 @@ impl SendAdditionalConnections { // The chunk size is chosen to be big enough so that even very fast // links need some milliseconds to send it. 'next_partition: for chunk in table.partition(Self::CHUNK_SIZE) { + // If one of the workers encountered an error, we return it. + if self.cancel.load(Ordering::Acquire) { + return Err(self.error_rx.recv().unwrap()); + } + let chunk = Arc::new(chunk); // Find the first free channel and send the chunk via it. @@ -1372,9 +1409,14 @@ impl SendAdditionalConnections { impl Drop for SendAdditionalConnections { fn drop(&mut self) { info!("Sending disconnect message to channels"); - self.channels - .drain(..) - .for_each(|channel| channel.send(SendMemoryThreadMessage::Disconnect).unwrap()); + self.channels.drain(..).for_each(|channel| { + // One of the workers may have died and thus closed the channel. + // Thus we cannot simply do send().unwrap(). + let e = channel.send(SendMemoryThreadMessage::Disconnect); + if let Err(e) = e { + error!("Could not send disconnect message to worker thread: {e}"); + } + }); info!("Waiting for threads to finish"); self.threads From cd4ae48a384249d0f74edcbc84674f32eefc43bd Mon Sep 17 00:00:00 2001 From: Philipp Schuster Date: Tue, 9 Dec 2025 10:07:50 +0100 Subject: [PATCH 281/294] ch-remote: remove missing `--tls-dir` option to migration commands This was missing. We tried to access the option from the parsed args but it is not yet known to clap that clap should parse these options. Follow-up of d5e345b20882878440f082e43d33296283fd8847. Signed-off-by: Philipp Schuster On-behalf-of: SAP philipp.schuster@sap.com --- src/bin/ch-remote.rs | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/src/bin/ch-remote.rs b/src/bin/ch-remote.rs index 587232be2d..5bb4fc2e44 100644 --- a/src/bin/ch-remote.rs +++ b/src/bin/ch-remote.rs @@ -1085,6 +1085,12 @@ fn get_cli_commands_sorted() -> Box<[Command]> { .index(1) // Live migration with net_fds not supported in ch-remote. .help(""), + ) + .arg( + Arg::new("tls-dir") + .long("tls-dir") + .help("directory with TLS certificates") + .num_args(1), ), Command::new("remove-device") .about("Remove VFIO and PCI device") @@ -1183,6 +1189,12 @@ fn get_cli_commands_sorted() -> Box<[Command]> { .long("local") .num_args(0) .action(ArgAction::SetTrue), + ) + .arg( + Arg::new("tls-dir") + .long("tls-dir") + .help("directory with TLS certificates") + .num_args(1), ), Command::new("shutdown").about("Shutdown the VM"), Command::new("shutdown-vmm").about("Shutdown the VMM"), From dc66add97a5271b6c6ca9ab4281c71c7fdad8b0a Mon Sep 17 00:00:00 2001 From: Philipp Schuster Date: Tue, 9 Dec 2025 10:08:52 +0100 Subject: [PATCH 282/294] ch-remote: tls_dir -> tls-dir Rename to the same name that the argument parser uses. This is also consistent with the rest of the CLI, where underscores are unusual. Signed-off-by: Philipp Schuster On-behalf-of: SAP philipp.schuster@sap.com --- src/bin/ch-remote.rs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/bin/ch-remote.rs b/src/bin/ch-remote.rs index 5bb4fc2e44..9e042ebe5b 100644 --- a/src/bin/ch-remote.rs +++ b/src/bin/ch-remote.rs @@ -518,7 +518,7 @@ fn rest_api_do_command(matches: &ArgMatches, socket: &mut UnixStream) -> ApiResu matches .subcommand_matches("send-migration") .unwrap() - .get_one::("tls_dir") + .get_one::("tls-dir") .cloned(), ); simple_api_command(socket, "PUT", "send-migration", Some(&send_migration_data)) @@ -533,9 +533,9 @@ fn rest_api_do_command(matches: &ArgMatches, socket: &mut UnixStream) -> ApiResu .unwrap() .to_owned(), matches - .subcommand_matches("receive_migration") + .subcommand_matches("receive-migration") .unwrap() - .get_one::("tls_dir") + .get_one::("tls-dir") .cloned(), ); simple_api_command( From 651b008e838f5208e84d9aaece665ab54484ce2a Mon Sep 17 00:00:00 2001 From: Oliver Anderson Date: Tue, 9 Dec 2025 14:20:43 +0100 Subject: [PATCH 283/294] vmm: Enable AMX states prior to checking CPUID compatibility Since enabling AMX tile state components affect the result returned by `Hypervisor::get_supported_cpuid` we want this enabled prior to checking CPUID compatibility between the source and destination VMs. Although this is not required today, it is necessary in order for the upcoming CPU profiles correctly, and it will also be necessary once the check_cpuid_compatibility checks are extended to take state components into account. Signed-off-by: Oliver Anderson On-behalf-of: SAP oliver.anderson@sap.com --- vmm/src/lib.rs | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/vmm/src/lib.rs b/vmm/src/lib.rs index b6a1cba271..7497d3cc2d 100644 --- a/vmm/src/lib.rs +++ b/vmm/src/lib.rs @@ -35,7 +35,7 @@ use std::thread::JoinHandle; use std::time::{Duration, Instant}; use std::{io, mem, result, thread}; -use anyhow::anyhow; +use anyhow::{Context, anyhow}; #[cfg(feature = "dbus_api")] use api::dbus::{DBusApiOptions, DBusApiShutdownChannels}; use api::http::HttpApiHandle; @@ -2354,6 +2354,16 @@ impl Vmm { let dest_cpuid = &{ let vm_config = &src_vm_config.lock().unwrap(); + if vm_config.cpus.features.amx { + // Need to enable AMX tile state components before generating common cpuid + // as this affects what Hypervisor::get_supported_cpuid returns. + hypervisor::arch::x86::XsaveState::enable_amx_state_components( + self.hypervisor.as_ref(), + ) + .context("Unable to enable AMX before generating common CPUID") + .map_err(MigratableError::MigrateReceive)?; + } + let phys_bits = vm::physical_bits(&self.hypervisor, vm_config.cpus.max_phys_bits); arch::generate_common_cpuid( &self.hypervisor.clone(), From 0eb0dcf870660b4f06024856811049ee22c6b1e6 Mon Sep 17 00:00:00 2001 From: Oliver Anderson Date: Tue, 9 Dec 2025 18:23:11 +0100 Subject: [PATCH 284/294] hypervisor: Permit enabling AMX tile state components more than once Temporary workaround until we switch over to the WIP fix upstream Signed-off-by: Oliver Anderson On-behalf-of: SAP oliver.anderson@sap.com --- hypervisor/src/arch/x86/mod.rs | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/hypervisor/src/arch/x86/mod.rs b/hypervisor/src/arch/x86/mod.rs index 56d1e98a24..f81734f0a9 100644 --- a/hypervisor/src/arch/x86/mod.rs +++ b/hypervisor/src/arch/x86/mod.rs @@ -396,9 +396,7 @@ impl XsaveState { ((size as usize) - size_of::()) .div_ceil(size_of::()) }; - XSAVE_FAM_LENGTH - .set(fam_length) - .expect("This should only be set once"); + let _ = XSAVE_FAM_LENGTH.set(fam_length); } Ok(()) From a77f6c525a8d932c31bcafbd20172aa78b1e27ac Mon Sep 17 00:00:00 2001 From: Oliver Anderson Date: Tue, 9 Dec 2025 10:44:05 +0100 Subject: [PATCH 285/294] arch: Initial data structures for describing CPUID parameters These data structures are required to define CPU profiles. Signed-off-by: Oliver Anderson On-behalf-of: SAP oliver.anderson@sap.com --- Cargo.lock | 69 ++++++++++++ arch/Cargo.toml | 5 + arch/src/x86_64/cpuid_definitions/mod.rs | 136 +++++++++++++++++++++++ arch/src/x86_64/mod.rs | 4 +- 4 files changed, 213 insertions(+), 1 deletion(-) create mode 100644 arch/src/x86_64/cpuid_definitions/mod.rs diff --git a/Cargo.lock b/Cargo.lock index 92b01e42c1..b96aead2cf 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -114,7 +114,9 @@ dependencies = [ "libc", "linux-loader", "log", + "proptest", "serde", + "serde_json", "thiserror 2.0.12", "uuid", "vm-fdt", @@ -322,6 +324,21 @@ dependencies = [ "syn", ] +[[package]] +name = "bit-set" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "08807e080ed7f9d5433fa9b275196cfc35414f66a0c79d864dc51a0d825231a3" +dependencies = [ + "bit-vec", +] + +[[package]] +name = "bit-vec" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5e764a1d40d510daf35e07be9eb06e75770908c27d411ee6c92109c9840eaaf7" + [[package]] name = "bitfield-struct" version = "0.10.1" @@ -1809,6 +1826,31 @@ dependencies = [ "unicode-ident", ] +[[package]] +name = "proptest" +version = "1.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bee689443a2bd0a16ab0348b52ee43e3b2d1b1f931c8aa5c9f8de4c86fbe8c40" +dependencies = [ + "bit-set", + "bit-vec", + "bitflags 2.9.4", + "num-traits", + "rand", + "rand_chacha", + "rand_xorshift", + "regex-syntax", + "rusty-fork", + "tempfile", + "unarray", +] + +[[package]] +name = "quick-error" +version = "1.2.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a1d01941d82fa2ab50be1e79e6714289dd7cde78eba4c074bc5a4374f650dfe0" + [[package]] name = "quote" version = "1.0.40" @@ -1853,6 +1895,15 @@ dependencies = [ "getrandom 0.3.3", ] +[[package]] +name = "rand_xorshift" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "513962919efc330f829edb2535844d1b912b0fbe2ca165d613e4e8788bb05a5a" +dependencies = [ + "rand_core", +] + [[package]] name = "range_map_vec" version = "0.2.0" @@ -2030,6 +2081,18 @@ version = "1.0.22" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b39cdef0fa800fc44525c84ccb54a029961a8215f9619753635a9c0d2538d46d" +[[package]] +name = "rusty-fork" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cc6bf79ff24e648f6da1f8d1f011e9cac26491b619e6b9280f2b47f1774e6ee2" +dependencies = [ + "fnv", + "quick-error", + "tempfile", + "wait-timeout", +] + [[package]] name = "ryu" version = "1.0.20" @@ -2372,6 +2435,12 @@ dependencies = [ "winapi", ] +[[package]] +name = "unarray" +version = "0.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "eaea85b334db583fe3274d12b4cd1880032beab409c0d774be044d4480ab9a94" + [[package]] name = "unicode-ident" version = "1.0.18" diff --git a/arch/Cargo.toml b/arch/Cargo.toml index 804be793d0..3e82367707 100644 --- a/arch/Cargo.toml +++ b/arch/Cargo.toml @@ -28,5 +28,10 @@ vmm-sys-util = { workspace = true, features = ["with-serde"] } fdt_parser = { version = "0.1.5", package = "fdt" } vm-fdt = { workspace = true } +# Use this to test our custom serialization logic +[dev-dependencies] +proptest = "1.0.0" +serde_json = { workspace = true } + [lints] workspace = true diff --git a/arch/src/x86_64/cpuid_definitions/mod.rs b/arch/src/x86_64/cpuid_definitions/mod.rs new file mode 100644 index 0000000000..a5b39dbe03 --- /dev/null +++ b/arch/src/x86_64/cpuid_definitions/mod.rs @@ -0,0 +1,136 @@ +// Copyright © 2025 Cyberus Technology GmbH +// +// SPDX-License-Identifier: Apache-2.0 +// + +use std::io::Write; +use std::ops::RangeInclusive; + +use serde::{Deserialize, Deserializer, Serialize, Serializer}; + +use crate::x86_64::CpuidReg; + +pub(in crate::x86_64) fn serialize_as_hex( + input: &u32, + serializer: S, +) -> Result { + // two bytes for "0x" prefix and eight for the hex encoded number + let mut buffer = [0_u8; 10]; + let _ = write!(&mut buffer[..], "{:#010x}", input); + let str = core::str::from_utf8(&buffer[..]) + .expect("the buffer should be filled with valid UTF-8 bytes"); + serializer.serialize_str(str) +} + +pub(in crate::x86_64) fn deserialize_from_hex<'de, D: Deserializer<'de>>( + deserializer: D, +) -> Result { + let hex = <&'de str as Deserialize>::deserialize(deserializer)?; + u32::from_str_radix(hex.strip_prefix("0x").unwrap_or(""), 16).map_err(|_| { + ::custom(format!("{hex} is not a hex encoded 32 bit integer")) + }) +} + +/// Parameters for inspecting CPUID definitions. +#[derive(Debug, Clone, Eq, PartialEq, Serialize, Deserialize)] +pub struct Parameters { + // The leaf (EAX) parameter used with the CPUID instruction + #[serde(serialize_with = "serialize_as_hex")] + #[serde(deserialize_with = "deserialize_from_hex")] + pub leaf: u32, + // The sub-leaf (ECX) parameter used with the CPUID instruction + pub sub_leaf: RangeInclusive, + // The register we are interested in inspecting which gets filled by the CPUID instruction + pub register: CpuidReg, +} + +#[cfg(test)] +mod tests { + use proptest::prelude::*; + use serde::Deserialize; + + use super::{Parameters, deserialize_from_hex, serialize_as_hex}; + use crate::x86_64::CpuidReg; + + /* + Check that the leaves get the string representation we expect. + This does not really matter from a functionality point of view, but we want + to read it in the expected format when manually viewing the generated CPU + profile files. + + Also assert that deserialization gives the original value back + */ + #[test] + fn hex_serialization() { + for (leaf, expected) in [ + 0x0_u32, 0x7, 0xd, 0x1e, 0x40000000, 0x4fffffff, 0x80000000, 0x8fffffff, + ] + .into_iter() + .zip([ + "0x00000000", + "0x00000007", + "0x0000000d", + "0x0000001e", + "0x40000000", + "0x4fffffff", + "0x80000000", + "0x8fffffff", + ]) { + let mut v = Vec::new(); + let mut serializer = serde_json::Serializer::new(&mut v); + serialize_as_hex(&leaf, &mut serializer).unwrap(); + let serialized = str::from_utf8(&v[..]).unwrap(); + // JSON Strings have surrounding "" hence we trim that + let serialized_trimmed = serialized + .strip_prefix('"') + .unwrap() + .strip_suffix('"') + .unwrap(); + dbg!(serialized_trimmed); + assert_eq!(serialized_trimmed, expected); + // Also check that we can deserialize this back to the original value + let mut deserializer = serde_json::Deserializer::from_str(serialized); + let deserialized = deserialize_from_hex(&mut deserializer).unwrap(); + assert_eq!(deserialized, leaf); + } + } + + // Check that serializing and then deserializing a value of type `Parameter` results in the + // same value we started with. + proptest! { + #[test] + fn parameter_serialization_roundtrip_works(leaf in 0u32..u32::MAX, x1 in 0u32..100, x2 in 0u32..100, reg in 0..4) { + let sub_leaf_range_start = std::cmp::min(x1, x2); + let sub_leaf_range_end = std::cmp::max(x1,x2); + let sub_leaf = sub_leaf_range_start..=sub_leaf_range_end; + let register = match reg { + 0 => CpuidReg::EAX, + 1 => CpuidReg::EBX, + 2 => CpuidReg::ECX, + 3 => CpuidReg::EDX, + _ => unreachable!() + }; + let cpuid_parameters = Parameters { + leaf, + sub_leaf, + register + }; + let serialized = serde_json::to_string(&cpuid_parameters).unwrap(); + let deserialized: Parameters = serde_json::from_str(&serialized).unwrap(); + prop_assert_eq!(&deserialized, &cpuid_parameters); + } + } + + // Check that `deserialize_from_hex` does not succeed if the stringified u32 does not start with 0x + proptest! { + #[test] + fn hex_deserialization_requires_prefix(leaf in any::().prop_map(|leaf| std::iter::once('"').chain(leaf.to_string().chars()).chain(std::iter::once('"')).collect::())) { + let mut deserializer = serde_json::Deserializer::from_str(leaf.as_str()); + // Check that standard deserialization works + let result = ::deserialize(&mut deserializer); + prop_assert!(result.is_ok()); + let mut deserializer = serde_json::Deserializer::from_str(leaf.as_str()); + prop_assert!(deserialize_from_hex(&mut deserializer).is_err()); + } + } +} diff --git a/arch/src/x86_64/mod.rs b/arch/src/x86_64/mod.rs index 648220e070..7edb0d03ab 100644 --- a/arch/src/x86_64/mod.rs +++ b/arch/src/x86_64/mod.rs @@ -7,6 +7,7 @@ // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE-BSD-3-Clause file. use std::sync::Arc; +pub mod cpuid_definitions; pub mod interrupts; pub mod layout; mod mpspec; @@ -20,6 +21,7 @@ use linux_loader::loader::bootparam::{boot_params, setup_header}; use linux_loader::loader::elf::start_info::{ hvm_memmap_table_entry, hvm_modlist_entry, hvm_start_info, }; +use serde::{Deserialize, Serialize}; use thiserror::Error; use vm_memory::{ Address, Bytes, GuestAddress, GuestAddressSpace, GuestMemory, GuestMemoryAtomic, @@ -181,7 +183,7 @@ pub fn get_max_x2apic_id(topology: (u16, u16, u16, u16)) -> u32 { ) } -#[derive(Copy, Clone, Debug)] +#[derive(Copy, Clone, Debug, PartialEq, Eq, Serialize, Deserialize)] pub enum CpuidReg { EAX, EBX, From b338de5e6f24266ed082f4f6586e0072570f17df Mon Sep 17 00:00:00 2001 From: Oliver Anderson Date: Tue, 7 Oct 2025 04:39:38 +0200 Subject: [PATCH 286/294] hypervisor: Implement common traits for HypervisorType and CpuVendor We want CPU profiles to keep a record of the hypervisor type and cpu vendor that they are intended to work with. This is made more convenient if all of these types implement common traits (used for serialization). Signed-Off-by: Oliver Anderson On-behalf-of: SAP oliver.anderson@sap.com --- hypervisor/src/cpu.rs | 2 +- hypervisor/src/lib.rs | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/hypervisor/src/cpu.rs b/hypervisor/src/cpu.rs index 519b99f567..bfd24f12e8 100644 --- a/hypervisor/src/cpu.rs +++ b/hypervisor/src/cpu.rs @@ -30,7 +30,7 @@ use crate::kvm::{TdxExitDetails, TdxExitStatus}; use crate::{CpuState, MpState, StandardRegisters}; #[cfg(target_arch = "x86_64")] -#[derive(Copy, Clone, Default)] +#[derive(Debug, Copy, Clone, Default, serde::Serialize, serde::Deserialize, Eq, PartialEq)] pub enum CpuVendor { #[default] Unknown, diff --git a/hypervisor/src/lib.rs b/hypervisor/src/lib.rs index 205691a421..2e653708c5 100644 --- a/hypervisor/src/lib.rs +++ b/hypervisor/src/lib.rs @@ -69,7 +69,7 @@ pub use vm::{ pub use crate::hypervisor::{Hypervisor, HypervisorError}; -#[derive(Debug, Copy, Clone)] +#[derive(Debug, Copy, Clone, serde::Serialize, serde::Deserialize, PartialEq, Eq)] pub enum HypervisorType { #[cfg(feature = "kvm")] Kvm, From f62dc8de9125fe464fe1b6c163fcacd11c22804c Mon Sep 17 00:00:00 2001 From: Oliver Anderson Date: Tue, 9 Dec 2025 13:27:00 +0100 Subject: [PATCH 287/294] arch: CpuProfile data structures We introduce essential data structures together with basic functionality that is necessary to apply a CPU profile to a host. Signed-off-by: Oliver Anderson On-behalf-of: SAP oliver.anderson@sap.com --- arch/src/lib.rs | 30 ++++ arch/src/x86_64/cpu_profile.rs | 246 +++++++++++++++++++++++++++++++++ arch/src/x86_64/mod.rs | 1 + 3 files changed, 277 insertions(+) create mode 100644 arch/src/x86_64/cpu_profile.rs diff --git a/arch/src/lib.rs b/arch/src/lib.rs index 36fa20f13c..2a298d0ba3 100644 --- a/arch/src/lib.rs +++ b/arch/src/lib.rs @@ -12,12 +12,17 @@ extern crate log; use std::collections::BTreeMap; +use std::str::FromStr; use std::sync::Arc; use std::{fmt, result}; +use serde::de::IntoDeserializer; use serde::{Deserialize, Serialize}; use thiserror::Error; +#[cfg(target_arch = "x86_64")] +pub use crate::x86_64::cpu_profile::CpuProfile; + type GuestMemoryMmap = vm_memory::GuestMemoryMmap; type GuestRegionMmap = vm_memory::GuestRegionMmap; @@ -56,6 +61,31 @@ pub enum Error { /// Type for returning public functions outcome. pub type Result = result::Result; +// If the target_arch is x86_64 we import CpuProfile from the x86_64 module, otherwise we +// declare it here. +#[cfg(not(target_arch = "x86_64"))] +#[derive(Debug, Default, Clone, Copy, PartialEq, Eq, serde::Serialize, serde::Deserialize)] +#[serde(rename_all = "kebab-case")] +/// A [`CpuProfile`] is a mechanism for ensuring live migration compatibility +/// between host's with potentially different CPU models. +pub enum CpuProfile { + #[default] + Host, +} + +impl FromStr for CpuProfile { + type Err = serde::de::value::Error; + fn from_str(s: &str) -> result::Result { + // Should accept both plain strings, and strings surrounded by `"`. + let normalized = s + .strip_prefix('"') + .unwrap_or(s) + .strip_suffix('"') + .unwrap_or(s); + Self::deserialize(normalized.into_deserializer()) + } +} + /// Type for memory region types. #[derive(Clone, Copy, PartialEq, Eq, Debug, Serialize, Deserialize)] pub enum RegionType { diff --git a/arch/src/x86_64/cpu_profile.rs b/arch/src/x86_64/cpu_profile.rs new file mode 100644 index 0000000000..bf5d90f061 --- /dev/null +++ b/arch/src/x86_64/cpu_profile.rs @@ -0,0 +1,246 @@ +// Copyright © 2025 Cyberus Technology GmbH +// +// SPDX-License-Identifier: Apache-2.0 +// + +use hypervisor::arch::x86::CpuIdEntry; +use hypervisor::{CpuVendor, HypervisorType}; +use serde::{Deserialize, Serialize}; +use thiserror::Error; + +use crate::x86_64::CpuidReg; +use crate::x86_64::cpuid_definitions::{Parameters, deserialize_from_hex, serialize_as_hex}; + +#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, Default)] +#[serde(rename_all = "kebab-case")] +/// A [`CpuProfile`] is a mechanism for ensuring live migration compatibility +/// between host's with potentially different CPU models. +pub enum CpuProfile { + #[default] + Host, + Skylake, + SapphireRapids, +} + +impl CpuProfile { + /// Loads pre-generated data associated with a CPU profile. + /// + /// If the `amx` flag is false then the AMX tile state components will be + /// zeroed out from the associated profile data. This is necessary because + /// they will then not be present in the vector of [`CpuidEntry`] values + /// obtained from the hypervisor. + // + // We can only generate CPU profiles for the KVM hypervisor for the time being. + #[cfg(feature = "kvm")] + pub(in crate::x86_64) fn data(&self, amx: bool) -> Option { + let mut data: CpuProfileData = match self { + Self::Host => None, + Self::Skylake => todo!(), + Self::SapphireRapids => todo!(), + }?; + + if !amx { + // In this case we will need to wipe out the AMX tile state components (if they are included in the profile) + for adj in data.adjustments.iter_mut() { + if adj.0.sub_leaf.start() != adj.0.sub_leaf.end() { + // The generated profiles produce as many sub-leaf entries as possible, and only use ranges for + // values not found. + continue; + } + let sub_leaf = *adj.0.sub_leaf.start(); + let leaf = adj.0.leaf; + if (leaf == 0xd) && (sub_leaf == 0) && (adj.0.register == CpuidReg::EAX) { + adj.1.replacements &= !((1 << 17) | (1 << 18)); + } + + if (leaf == 0xd) && (sub_leaf == 1) && (adj.0.register == CpuidReg::ECX) { + adj.1.replacements &= !((1 << 17) | (1 << 18)); + } + + if (leaf == 0xd) && ((sub_leaf == 17) | (sub_leaf == 18)) { + adj.1.replacements = 0; + } + } + } + + Some(data) + } + + #[cfg(not(feature = "kvm"))] + pub(in crate::x86_64) fn data(&self, _amx: bool) -> Option { + if matches!(*self, Self::Host) { + return None; + } + // This will need to be addressed before upstreaming. + // We will probably need one profile per hypervisor. + unimplemented!() + } +} + +/// Every [`CpuProfile`] different from `Host` has associated [`CpuProfileData`]. +/// +/// New constructors of this struct may only be generated through the CHV CLI (when built from source with +/// the `cpu-profile-generation` feature) which other hosts may then attempt to load in order to +/// increase the likelihood of successful live migrations among all hosts that opted in to the given +/// CPU profile. +#[derive(Debug, Clone, Serialize, Deserialize)] +#[allow(dead_code)] +pub struct CpuProfileData { + /// The hypervisor used when generating this CPU profile. + pub(in crate::x86_64) hypervisor: HypervisorType, + /// The vendor of the CPU belonging to the host that generated this CPU profile. + pub(in crate::x86_64) cpu_vendor: CpuVendor, + /// Adjustments necessary to become compatible with the desired target. + pub(in crate::x86_64) adjustments: Vec<(Parameters, CpuidOutputRegisterAdjustments)>, +} + +/* TODO: The [`CpuProfile`] struct will likely need a few more iterations. The following +section should explain why: + +# MSR restrictions + +CPU profiles also need to restrict which MSRs may be manipulated by the guest as various physical CPUs +can have differing supported MSRs. + +The CPU profile will thus necessarily need to contain some data related to MSR restrictions. That will +be taken care of in a follow up MR. + +*/ + +/// Used for adjusting an entire cpuid output register (EAX, EBX, ECX or EDX) +#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)] +pub(super) struct CpuidOutputRegisterAdjustments { + #[serde(serialize_with = "serialize_as_hex")] + #[serde(deserialize_with = "deserialize_from_hex")] + pub(in crate::x86_64) replacements: u32, + /// Used to zero out the area `replacements` occupy. This mask is not necessarily !replacements, as replacements may pack values of different types (i.e. it is wrong to think of it as a bitset conceptually speaking). + #[serde(serialize_with = "serialize_as_hex")] + #[serde(deserialize_with = "deserialize_from_hex")] + pub(in crate::x86_64) mask: u32, +} +impl CpuidOutputRegisterAdjustments { + pub(in crate::x86_64) fn adjust(self, cpuid_output_register: &mut u32) { + let temp_register_copy = *cpuid_output_register; + let replacements_area_masked_in_temp_copy = temp_register_copy & self.mask; + *cpuid_output_register = replacements_area_masked_in_temp_copy | self.replacements; + } + + pub(in crate::x86_64) fn adjust_cpuid_entries( + mut cpuid: Vec, + adjustments: &[(Parameters, Self)], + ) -> Result, MissingCpuidEntriesError> { + for entry in &mut cpuid { + for (reg, reg_value) in [ + (CpuidReg::EAX, &mut entry.eax), + (CpuidReg::EBX, &mut entry.ebx), + (CpuidReg::ECX, &mut entry.ecx), + (CpuidReg::EDX, &mut entry.edx), + ] { + // Get the adjustment corresponding to the entry's function/leaf and index/sub-leaf for each of the register. If no such + // adjustment is found we use the trivial adjustment (leading to the register being zeroed out entirely). + let adjustment = adjustments + .iter() + .find_map(|(param, adjustment)| { + ((param.leaf == entry.function) + & param.sub_leaf.contains(&entry.index) + & (param.register == reg)) + .then_some(*adjustment) + }) + .unwrap_or(CpuidOutputRegisterAdjustments { + mask: 0, + replacements: 0, + }); + adjustment.adjust(reg_value); + } + } + + Self::expected_entries_found(&cpuid, adjustments).map(|_| cpuid) + } + + /// Check that we found every value that was supposed to be replaced with something else than 0 + /// + /// IMPORTANT: This function assumes that the given `cpuid` has already been adjusted with the + /// provided `adjustments`. + fn expected_entries_found( + cpuid: &[CpuIdEntry], + adjustments: &[(Parameters, Self)], + ) -> Result<(), MissingCpuidEntriesError> { + let mut missing_entry = false; + + // Invalid state components can be ignored. The next few lines obtain the relevant entries to + // check for this. + let eax_0xd_0 = cpuid + .iter() + .find(|entry| (entry.function == 0xd) && (entry.index == 0)) + .map(|entry| entry.eax) + .unwrap_or(0); + let ecx_0xd_1 = cpuid + .iter() + .find(|entry| (entry.function == 0xd) && (entry.index == 1)) + .map(|entry| entry.ecx) + .unwrap_or(0); + + let edx_0xd_0 = cpuid + .iter() + .find(|entry| (entry.function == 0xd) && (entry.index == 0)) + .map(|entry| entry.edx) + .unwrap_or(0); + let edx_0xd_1 = cpuid + .iter() + .find(|entry| (entry.function == 0xd) && (entry.index == 1)) + .map(|entry| entry.edx) + .unwrap_or(0); + + for (param, adjustment) in adjustments { + if adjustment.replacements == 0 { + continue; + } + let sub_start = *param.sub_leaf.start(); + let sub_end = *param.sub_leaf.end(); + + let can_skip_lo = if (param.leaf == 0xd) && (2..32).contains(&sub_start) { + let start = sub_start; + let end = std::cmp::min(sub_end, 31); + let mask = (start..=end).fold(0, |acc, next| acc | (1 << next)); + ((mask & eax_0xd_0) == 0) & ((mask & ecx_0xd_1) == 0) + } else { + false + }; + + let can_skip_hi = if (param.leaf == 0xd) && (32..64).contains(&sub_end) { + let start = std::cmp::max(32, sub_start); + let end = sub_end; + let mask = (start..=end) + .map(|val| val - 32) + .fold(0, |acc, next| acc | (1 << next)); + ((mask & edx_0xd_0) == 0) & ((mask & edx_0xd_1) == 0) + } else { + false + }; + + if can_skip_lo && can_skip_hi { + // This means that all state components referred to by the specified sub-leaf range are not valid + // and may be skipped. + continue; + } + if !cpuid.iter().any(|entry| { + (entry.function == param.leaf) && (param.sub_leaf.contains(&entry.index)) + }) { + error!( + "cannot adjust CPU profile. No entry found matching the required parameters: {:?}", + param + ); + missing_entry = true; + } + } + if missing_entry { + Err(MissingCpuidEntriesError) + } else { + Ok(()) + } + } +} + +#[derive(Debug, Error)] +#[error("Required CPUID entries not found")] +pub(in crate::x86_64) struct MissingCpuidEntriesError; diff --git a/arch/src/x86_64/mod.rs b/arch/src/x86_64/mod.rs index 7edb0d03ab..c8d7d49f2a 100644 --- a/arch/src/x86_64/mod.rs +++ b/arch/src/x86_64/mod.rs @@ -7,6 +7,7 @@ // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE-BSD-3-Clause file. use std::sync::Arc; +pub mod cpu_profile; pub mod cpuid_definitions; pub mod interrupts; pub mod layout; From 1ceda9d79b351951ffd43e91696e3df6262a4d1d Mon Sep 17 00:00:00 2001 From: Oliver Anderson Date: Tue, 7 Oct 2025 05:34:22 +0200 Subject: [PATCH 288/294] misc: Make CPU profile part of various configs We integrate the CPU profile into the various configs that ultimately get set by the user. This quickly ends up involving multiple files, luckily Rust helps us find which ones via compilation errors. Signed-Off-by: Oliver Anderson On-behalf-of: SAP oliver.anderson@sap.com --- arch/src/x86_64/mod.rs | 3 ++- src/main.rs | 1 + vmm/src/config.rs | 9 +++++++++ vmm/src/cpu.rs | 1 + vmm/src/lib.rs | 21 +++++++++++++++++---- vmm/src/vm.rs | 15 +++++++++------ vmm/src/vm_config.rs | 4 ++++ 7 files changed, 43 insertions(+), 11 deletions(-) diff --git a/arch/src/x86_64/mod.rs b/arch/src/x86_64/mod.rs index c8d7d49f2a..7ee535472d 100644 --- a/arch/src/x86_64/mod.rs +++ b/arch/src/x86_64/mod.rs @@ -29,7 +29,7 @@ use vm_memory::{ GuestMemoryRegion, }; -use crate::{GuestMemoryMmap, InitramfsConfig, RegionType}; +use crate::{CpuProfile, GuestMemoryMmap, InitramfsConfig, RegionType}; mod smbios; use std::arch::x86_64; #[cfg(feature = "tdx")] @@ -87,6 +87,7 @@ pub struct CpuidConfig { #[cfg(feature = "tdx")] pub tdx: bool, pub amx: bool, + pub profile: CpuProfile, } #[derive(Debug, Error)] diff --git a/src/main.rs b/src/main.rs index 87a13805a9..9436b3e160 100644 --- a/src/main.rs +++ b/src/main.rs @@ -961,6 +961,7 @@ mod unit_tests { max_phys_bits: 46, affinity: None, features: CpuFeatures::default(), + profile: Default::default(), }, memory: MemoryConfig { size: 536_870_912, diff --git a/vmm/src/config.rs b/vmm/src/config.rs index 2ca2505814..815ad7a7ea 100644 --- a/vmm/src/config.rs +++ b/vmm/src/config.rs @@ -11,6 +11,7 @@ use std::path::PathBuf; use std::result; use std::str::FromStr; +use arch::CpuProfile; use clap::ArgMatches; use option_parser::{ ByteSized, IntegerList, OptionParser, OptionParserError, StringList, Toggle, Tuple, @@ -600,6 +601,7 @@ impl CpusConfig { .add("kvm_hyperv") .add("max_phys_bits") .add("affinity") + .add("profile") .add("features"); parser.parse(cpus).map_err(Error::ParseCpus)?; @@ -632,6 +634,12 @@ impl CpusConfig { }) .collect() }); + + let profile = parser + .convert::("profile") + .map_err(Error::ParseCpus)? + .unwrap_or_default(); + let features_list = parser .convert::("features") .map_err(Error::ParseCpus)? @@ -663,6 +671,7 @@ impl CpusConfig { max_phys_bits, affinity, features, + profile, }) } } diff --git a/vmm/src/cpu.rs b/vmm/src/cpu.rs index ebdfbf478c..416bce5d45 100644 --- a/vmm/src/cpu.rs +++ b/vmm/src/cpu.rs @@ -812,6 +812,7 @@ impl CpuManager { #[cfg(feature = "tdx")] tdx, amx: self.config.features.amx, + profile: self.config.profile, }, ) .map_err(Error::CommonCpuId)? diff --git a/vmm/src/lib.rs b/vmm/src/lib.rs index 7497d3cc2d..3d92c74a8c 100644 --- a/vmm/src/lib.rs +++ b/vmm/src/lib.rs @@ -2224,17 +2224,26 @@ impl Vmm { ))); }; - let amx = vm_config.lock().unwrap().cpus.features.amx; - let phys_bits = - vm::physical_bits(&hypervisor, vm_config.lock().unwrap().cpus.max_phys_bits); + let (amx, phys_bits, profile, kvm_hyperv) = { + let guard = vm_config.lock().unwrap(); + let amx = guard.cpus.features.amx; + let max_phys_bits = guard.cpus.max_phys_bits; + let profile = guard.cpus.profile; + let kvm_hyperv = guard.cpus.kvm_hyperv; + // Drop lock before function call + core::mem::drop(guard); + let phys_bits = vm::physical_bits(&hypervisor, max_phys_bits); + (amx, phys_bits, profile, kvm_hyperv) + }; arch::generate_common_cpuid( &hypervisor, &arch::CpuidConfig { phys_bits, - kvm_hyperv: vm_config.lock().unwrap().cpus.kvm_hyperv, + kvm_hyperv, #[cfg(feature = "tdx")] tdx: false, amx, + profile, }, ) .map_err(|e| { @@ -2373,6 +2382,7 @@ impl Vmm { #[cfg(feature = "tdx")] tdx: false, amx: vm_config.cpus.features.amx, + profile: vm_config.cpus.profile, }, ) .map_err(|e| { @@ -3500,6 +3510,8 @@ const DEVICE_MANAGER_SNAPSHOT_ID: &str = "device-manager"; #[cfg(test)] mod unit_tests { + use arch::CpuProfile; + use super::*; #[cfg(target_arch = "x86_64")] use crate::vm_config::DebugConsoleConfig; @@ -3533,6 +3545,7 @@ mod unit_tests { max_phys_bits: 46, affinity: None, features: CpuFeatures::default(), + profile: CpuProfile::default(), }, memory: MemoryConfig { size: 536_870_912, diff --git a/vmm/src/vm.rs b/vmm/src/vm.rs index a776d0d943..aa981e226b 100644 --- a/vmm/src/vm.rs +++ b/vmm/src/vm.rs @@ -2893,19 +2893,22 @@ impl Snapshottable for Vm { #[cfg(all(feature = "kvm", target_arch = "x86_64"))] let common_cpuid = { - let amx = self.config.lock().unwrap().cpus.features.amx; - let phys_bits = physical_bits( - &self.hypervisor, - self.config.lock().unwrap().cpus.max_phys_bits, - ); + let guard = self.config.lock().unwrap(); + let amx = guard.cpus.features.amx; + let phys_bits = physical_bits(&self.hypervisor, guard.cpus.max_phys_bits); + let kvm_hyperv = guard.cpus.kvm_hyperv; + let profile = guard.cpus.profile; + // Drop the guard before function call + core::mem::drop(guard); arch::generate_common_cpuid( &self.hypervisor, &arch::CpuidConfig { phys_bits, - kvm_hyperv: self.config.lock().unwrap().cpus.kvm_hyperv, + kvm_hyperv, #[cfg(feature = "tdx")] tdx: false, amx, + profile, }, ) .map_err(|e| { diff --git a/vmm/src/vm_config.rs b/vmm/src/vm_config.rs index 45bd382b9c..7238a7ca23 100644 --- a/vmm/src/vm_config.rs +++ b/vmm/src/vm_config.rs @@ -8,6 +8,7 @@ use std::path::{Path, PathBuf}; use std::str::FromStr; use std::{fs, result}; +use arch::CpuProfile; use net_util::MacAddr; use serde::{Deserialize, Serialize}; use thiserror::Error; @@ -68,6 +69,8 @@ pub struct CpusConfig { pub affinity: Option>, #[serde(default)] pub features: CpuFeatures, + #[serde(default)] + pub profile: CpuProfile, } pub const DEFAULT_VCPUS: u32 = 1; @@ -82,6 +85,7 @@ impl Default for CpusConfig { max_phys_bits: DEFAULT_MAX_PHYS_BITS, affinity: None, features: CpuFeatures::default(), + profile: CpuProfile::default(), } } } From a9bda23149c9128cad99f63d9e8d6e858abf3700 Mon Sep 17 00:00:00 2001 From: Oliver Anderson Date: Tue, 9 Dec 2025 16:57:31 +0100 Subject: [PATCH 289/294] arch: Apply CPU profile (if any) when generating common CPUID If a CPU profile is configured it should result in guests seeing a restricted subset of CPUID. This is what we finally achieve in this commit. Signed-off-by: Oliver Anderson On-behalf-of: SAP oliver.anderson@sap.com --- arch/src/x86_64/cpu_profile.rs | 2 +- arch/src/x86_64/mod.rs | 356 ++++++++++++++++++++------------- 2 files changed, 221 insertions(+), 137 deletions(-) diff --git a/arch/src/x86_64/cpu_profile.rs b/arch/src/x86_64/cpu_profile.rs index bf5d90f061..47237d720f 100644 --- a/arch/src/x86_64/cpu_profile.rs +++ b/arch/src/x86_64/cpu_profile.rs @@ -243,4 +243,4 @@ impl CpuidOutputRegisterAdjustments { #[derive(Debug, Error)] #[error("Required CPUID entries not found")] -pub(in crate::x86_64) struct MissingCpuidEntriesError; +pub struct MissingCpuidEntriesError; diff --git a/arch/src/x86_64/mod.rs b/arch/src/x86_64/mod.rs index 7ee535472d..7d7327d77b 100644 --- a/arch/src/x86_64/mod.rs +++ b/arch/src/x86_64/mod.rs @@ -29,6 +29,7 @@ use vm_memory::{ GuestMemoryRegion, }; +use crate::x86_64::cpu_profile::CpuidOutputRegisterAdjustments; use crate::{CpuProfile, GuestMemoryMmap, InitramfsConfig, RegionType}; mod smbios; use std::arch::x86_64; @@ -128,6 +129,26 @@ pub enum Error { #[error("Error getting supported CPUID through the hypervisor API")] CpuidGetSupported(#[source] HypervisorError), + #[error( + "The selected CPU profile cannot be utilized because the host's CPUID entries are not compatible with the profile" + )] + CpuProfileCpuidIncompatibility, + /// Error because TDX cannot be enabled when a custom (non host) CPU profile has been selected + #[error("TDX cannot be enabled when a custom CPU profile has been selected")] + CpuProfileTdxIncompatibility, + #[error( + "The selected CPU profile cannot be utilized because a necessary CPUID entry was not found" + )] + /// Error when trying to apply a CPU profile because a necessary CPUID entry was not found + MissingExpectedCpuidEntry(#[source] cpu_profile::MissingCpuidEntriesError), + /// Error when trying to apply a CPU profile because the host has a CPU from a different vendor + #[error( + "The selected CPU profile cannot be utilized because the host has a CPU from a different vendor" + )] + CpuProfileVendorIncompatibility { + cpu_vendor_profile: CpuVendor, + cpu_vendor_host: CpuVendor, + }, /// Error populating CPUID with KVM HyperV emulation details #[error("Error populating CPUID with KVM HyperV emulation details")] CpuidKvmHyperV(#[source] vmm_sys_util::fam::Error), @@ -283,7 +304,7 @@ impl CpuidPatch { } } - pub fn patch_cpuid(cpuid: &mut [CpuIdEntry], patches: Vec) { + pub fn patch_cpuid(cpuid: &mut [CpuIdEntry], patches: &[CpuidPatch]) { for entry in cpuid { for patch in patches.iter() { if entry.function == patch.function && entry.index == patch.index { @@ -550,10 +571,15 @@ impl CpuidFeatureEntry { } } +/// This function generates the CPUID entries to be set for all CPUs. +/// +/// If the `config` has a CPU profile set (other than host) then the profile +/// will be applied pub fn generate_common_cpuid( hypervisor: &Arc, config: &CpuidConfig, ) -> super::Result> { + info!("calling generate_common_cpuid"); // SAFETY: cpuid called with valid leaves if unsafe { x86_64::__cpuid(1) }.ecx & (1 << HYPERVISOR_ECX_BIT) == 1 << HYPERVISOR_ECX_BIT { // SAFETY: cpuid called with valid leaves @@ -615,167 +641,225 @@ pub fn generate_common_cpuid( }); } - // Supported CPUID - let mut cpuid = hypervisor + // Supported CPUID according to the host and hypervisor + let mut host_cpuid = hypervisor .get_supported_cpuid() .map_err(Error::CpuidGetSupported)?; - CpuidPatch::patch_cpuid(&mut cpuid, cpuid_patches); - - #[cfg(feature = "tdx")] - let tdx_capabilities = if config.tdx { - let caps = hypervisor - .tdx_capabilities() - .map_err(Error::TdxCapabilities)?; - info!("TDX capabilities {:#?}", caps); - Some(caps) - } else { - None + let use_custom_profile = config.profile != CpuProfile::Host; + // Obtain cpuid entries that are adjusted to the specified CPU profile and the cpuid entries of the compatibility target + // TODO: Try to write this in a clearer way + let (host_adjusted_to_profile, profile_cpu_vendor) = { + config + .profile + .data(config.amx) + .map(|profile_data| { + ( + CpuidOutputRegisterAdjustments::adjust_cpuid_entries( + host_cpuid.clone(), + &profile_data.adjustments, + ) + .map(Some), + Some(profile_data.cpu_vendor), + ) + }) + .unwrap_or((Ok(None), None)) }; + let mut host_adjusted_to_profile = + host_adjusted_to_profile.map_err(Error::MissingExpectedCpuidEntry)?; + + // There should be relatively few cases where live migration can succeed between hosts from different + // CPU vendors and making our checks account for that possibility would complicate things substantially. + // We thus require that the host's cpu vendor matches the one used to generate the CPU profile. + if let Some(cpu_vendor_profile) = profile_cpu_vendor + && let cpu_vendor_host = hypervisor.get_cpu_vendor() + && cpu_vendor_profile != cpu_vendor_host + { + return Err(Error::CpuProfileVendorIncompatibility { + cpu_vendor_profile, + cpu_vendor_host, + } + .into()); + } + // We now make the modifications according to the config parameters to each of the cpuid entries + // declared above and then perform a compatibility check. + for cpuid_optiion in [Some(&mut host_cpuid), host_adjusted_to_profile.as_mut()] { + let Some(cpuid) = cpuid_optiion else { + break; + }; + CpuidPatch::patch_cpuid(cpuid, &cpuid_patches); - // Update some existing CPUID - for entry in cpuid.as_mut_slice().iter_mut() { - match entry.function { - // Clear AMX related bits if the AMX feature is not enabled - 0x7 => { - if !config.amx && entry.index == 0 { - entry.edx &= !((1 << AMX_BF16) | (1 << AMX_TILE) | (1 << AMX_INT8)) - } + #[cfg(feature = "tdx")] + let tdx_capabilities = if config.tdx { + if use_custom_profile { + return Err(Error::CpuProfileTdxIncompatibility.into()); } - 0xd => - { - #[cfg(feature = "tdx")] - if let Some(caps) = &tdx_capabilities { - let xcr0_mask: u64 = 0x82ff; - let xss_mask: u64 = !xcr0_mask; - if entry.index == 0 { - entry.eax &= (caps.xfam_fixed0 as u32) & (xcr0_mask as u32); - entry.eax |= (caps.xfam_fixed1 as u32) & (xcr0_mask as u32); - entry.edx &= ((caps.xfam_fixed0 & xcr0_mask) >> 32) as u32; - entry.edx |= ((caps.xfam_fixed1 & xcr0_mask) >> 32) as u32; - } else if entry.index == 1 { - entry.ecx &= (caps.xfam_fixed0 as u32) & (xss_mask as u32); - entry.ecx |= (caps.xfam_fixed1 as u32) & (xss_mask as u32); - entry.edx &= ((caps.xfam_fixed0 & xss_mask) >> 32) as u32; - entry.edx |= ((caps.xfam_fixed1 & xss_mask) >> 32) as u32; + let caps = hypervisor + .tdx_capabilities() + .map_err(Error::TdxCapabilities)?; + info!("TDX capabilities {:#?}", caps); + Some(caps) + } else { + None + }; + + // Update some existing CPUID + for entry in cpuid.as_mut_slice().iter_mut() { + match entry.function { + // Clear AMX related bits if the AMX feature is not enabled + 0x7 => { + if !config.amx && entry.index == 0 { + entry.edx &= !((1 << AMX_BF16) | (1 << AMX_TILE) | (1 << AMX_INT8)) } } - } - // Copy host L1 cache details if not populated by KVM - 0x8000_0005 => { - if entry.eax == 0 && entry.ebx == 0 && entry.ecx == 0 && entry.edx == 0 { - // SAFETY: cpuid called with valid leaves - if unsafe { std::arch::x86_64::__cpuid(0x8000_0000).eax } >= 0x8000_0005 { + 0xd => + { + #[cfg(feature = "tdx")] + if let Some(caps) = &tdx_capabilities { + let xcr0_mask: u64 = 0x82ff; + let xss_mask: u64 = !xcr0_mask; + if entry.index == 0 { + entry.eax &= (caps.xfam_fixed0 as u32) & (xcr0_mask as u32); + entry.eax |= (caps.xfam_fixed1 as u32) & (xcr0_mask as u32); + entry.edx &= ((caps.xfam_fixed0 & xcr0_mask) >> 32) as u32; + entry.edx |= ((caps.xfam_fixed1 & xcr0_mask) >> 32) as u32; + } else if entry.index == 1 { + entry.ecx &= (caps.xfam_fixed0 as u32) & (xss_mask as u32); + entry.ecx |= (caps.xfam_fixed1 as u32) & (xss_mask as u32); + entry.edx &= ((caps.xfam_fixed0 & xss_mask) >> 32) as u32; + entry.edx |= ((caps.xfam_fixed1 & xss_mask) >> 32) as u32; + } + } + } + // Copy host L1 cache details if not populated by KVM + 0x8000_0005 => { + if entry.eax == 0 && entry.ebx == 0 && entry.ecx == 0 && entry.edx == 0 { // SAFETY: cpuid called with valid leaves - let leaf = unsafe { std::arch::x86_64::__cpuid(0x8000_0005) }; - entry.eax = leaf.eax; - entry.ebx = leaf.ebx; - entry.ecx = leaf.ecx; - entry.edx = leaf.edx; + if unsafe { std::arch::x86_64::__cpuid(0x8000_0000).eax } >= 0x8000_0005 { + // SAFETY: cpuid called with valid leaves + let leaf = unsafe { std::arch::x86_64::__cpuid(0x8000_0005) }; + entry.eax = leaf.eax; + entry.ebx = leaf.ebx; + entry.ecx = leaf.ecx; + entry.edx = leaf.edx; + } } } - } - // Copy host L2 cache details if not populated by KVM - 0x8000_0006 => { - if entry.eax == 0 && entry.ebx == 0 && entry.ecx == 0 && entry.edx == 0 { - // SAFETY: cpuid called with valid leaves - if unsafe { std::arch::x86_64::__cpuid(0x8000_0000).eax } >= 0x8000_0006 { + // Copy host L2 cache details if not populated by KVM + 0x8000_0006 => { + if entry.eax == 0 && entry.ebx == 0 && entry.ecx == 0 && entry.edx == 0 { // SAFETY: cpuid called with valid leaves - let leaf = unsafe { std::arch::x86_64::__cpuid(0x8000_0006) }; - entry.eax = leaf.eax; - entry.ebx = leaf.ebx; - entry.ecx = leaf.ecx; - entry.edx = leaf.edx; + if unsafe { std::arch::x86_64::__cpuid(0x8000_0000).eax } >= 0x8000_0006 { + // SAFETY: cpuid called with valid leaves + let leaf = unsafe { std::arch::x86_64::__cpuid(0x8000_0006) }; + entry.eax = leaf.eax; + entry.ebx = leaf.ebx; + entry.ecx = leaf.ecx; + entry.edx = leaf.edx; + } } } - } - // Set CPU physical bits - 0x8000_0008 => { - entry.eax = (entry.eax & 0xffff_ff00) | (config.phys_bits as u32 & 0xff); - } - 0x4000_0001 => { - // Enable KVM_FEATURE_MSI_EXT_DEST_ID. This allows the guest to target - // device interrupts to cpus with APIC IDs > 254 without interrupt remapping. - entry.eax |= 1 << KVM_FEATURE_MSI_EXT_DEST_ID; - - // These features are not supported by TDX - #[cfg(feature = "tdx")] - if config.tdx { - entry.eax &= !((1 << KVM_FEATURE_CLOCKSOURCE_BIT) - | (1 << KVM_FEATURE_CLOCKSOURCE2_BIT) - | (1 << KVM_FEATURE_CLOCKSOURCE_STABLE_BIT) - | (1 << KVM_FEATURE_ASYNC_PF_BIT) - | (1 << KVM_FEATURE_ASYNC_PF_VMEXIT_BIT) - | (1 << KVM_FEATURE_STEAL_TIME_BIT)) + // Set CPU physical bits + 0x8000_0008 => { + entry.eax = (entry.eax & 0xffff_ff00) | (config.phys_bits as u32 & 0xff); + } + 0x4000_0001 => { + // Enable KVM_FEATURE_MSI_EXT_DEST_ID. This allows the guest to target + // device interrupts to cpus with APIC IDs > 254 without interrupt remapping. + entry.eax |= 1 << KVM_FEATURE_MSI_EXT_DEST_ID; + + // These features are not supported by TDX + #[cfg(feature = "tdx")] + if config.tdx { + entry.eax &= !((1 << KVM_FEATURE_CLOCKSOURCE_BIT) + | (1 << KVM_FEATURE_CLOCKSOURCE2_BIT) + | (1 << KVM_FEATURE_CLOCKSOURCE_STABLE_BIT) + | (1 << KVM_FEATURE_ASYNC_PF_BIT) + | (1 << KVM_FEATURE_ASYNC_PF_VMEXIT_BIT) + | (1 << KVM_FEATURE_STEAL_TIME_BIT)) + } } + _ => {} } - _ => {} } - } - // Copy CPU identification string - for i in 0x8000_0002..=0x8000_0004 { - cpuid.retain(|c| c.function != i); - // SAFETY: call cpuid with valid leaves - let leaf = unsafe { std::arch::x86_64::__cpuid(i) }; - cpuid.push(CpuIdEntry { - function: i, - eax: leaf.eax, - ebx: leaf.ebx, - ecx: leaf.ecx, - edx: leaf.edx, - ..Default::default() - }); - } + // Copy CPU identification string + /* + TODO: Do we want to do this in the case of CPU profiles? + */ + for i in 0x8000_0002..=0x8000_0004 { + cpuid.retain(|c| c.function != i); + // SAFETY: call cpuid with valid leaves + let leaf = unsafe { std::arch::x86_64::__cpuid(i) }; + cpuid.push(CpuIdEntry { + function: i, + eax: leaf.eax, + ebx: leaf.ebx, + ecx: leaf.ecx, + edx: leaf.edx, + ..Default::default() + }); + } - if config.kvm_hyperv { - // Remove conflicting entries - cpuid.retain(|c| c.function != 0x4000_0000); - cpuid.retain(|c| c.function != 0x4000_0001); - // See "Hypervisor Top Level Functional Specification" for details - // Compliance with "Hv#1" requires leaves up to 0x4000_000a - cpuid.push(CpuIdEntry { - function: 0x40000000, - eax: 0x4000000a, // Maximum cpuid leaf - ebx: 0x756e694c, // "Linu" - ecx: 0x564b2078, // "x KV" - edx: 0x7648204d, // "M Hv" - ..Default::default() - }); - cpuid.push(CpuIdEntry { - function: 0x40000001, - eax: 0x31237648, // "Hv#1" - ..Default::default() - }); - cpuid.push(CpuIdEntry { - function: 0x40000002, - eax: 0x3839, // "Build number" - ebx: 0xa0000, // "Version" - ..Default::default() - }); - cpuid.push(CpuIdEntry { - function: 0x4000_0003, - eax: (1 << 1) // AccessPartitionReferenceCounter + if config.kvm_hyperv { + // Remove conflicting entries + cpuid.retain(|c| c.function != 0x4000_0000); + cpuid.retain(|c| c.function != 0x4000_0001); + // See "Hypervisor Top Level Functional Specification" for details + // Compliance with "Hv#1" requires leaves up to 0x4000_000a + cpuid.push(CpuIdEntry { + function: 0x40000000, + eax: 0x4000000a, // Maximum cpuid leaf + ebx: 0x756e694c, // "Linu" + ecx: 0x564b2078, // "x KV" + edx: 0x7648204d, // "M Hv" + ..Default::default() + }); + cpuid.push(CpuIdEntry { + function: 0x40000001, + eax: 0x31237648, // "Hv#1" + ..Default::default() + }); + cpuid.push(CpuIdEntry { + function: 0x40000002, + eax: 0x3839, // "Build number" + ebx: 0xa0000, // "Version" + ..Default::default() + }); + cpuid.push(CpuIdEntry { + function: 0x4000_0003, + eax: (1 << 1) // AccessPartitionReferenceCounter | (1 << 2) // AccessSynicRegs | (1 << 3) // AccessSyntheticTimerRegs | (1 << 9), // AccessPartitionReferenceTsc - edx: 1 << 3, // CPU dynamic partitioning - ..Default::default() - }); - cpuid.push(CpuIdEntry { - function: 0x4000_0004, - eax: 1 << 5, // Recommend relaxed timing - ..Default::default() - }); - for i in 0x4000_0005..=0x4000_000a { + edx: 1 << 3, // CPU dynamic partitioning + ..Default::default() + }); cpuid.push(CpuIdEntry { - function: i, + function: 0x4000_0004, + eax: 1 << 5, // Recommend relaxed timing ..Default::default() }); + for i in 0x4000_0005..=0x4000_000a { + cpuid.push(CpuIdEntry { + function: i, + ..Default::default() + }); + } } } - - Ok(cpuid) + if !use_custom_profile { + Ok(host_cpuid) + } else { + // Final compatibility checks to ensure that the CPUID values we return are compatible both with the CPU profile and the host we are currently running on. + let host_adjusted_to_profile = host_adjusted_to_profile.expect("The profile adjusted cpuid entries should exist as we checked that we have a custom CPU profile"); + + // Check that the host's cpuid is indeed compatible with the adjusted profile. This is not by construction. + info!("checking compatibility between host adjusted to profile and the host itself"); + CpuidFeatureEntry::check_cpuid_compatibility(&host_adjusted_to_profile, &host_cpuid) + .map_err(|_| Error::CpuProfileCpuidIncompatibility)?; + Ok(host_adjusted_to_profile) + } } pub fn configure_vcpu( @@ -1419,7 +1503,7 @@ fn update_cpuid_topology( edx_bit: Some(28), }, ]; - CpuidPatch::patch_cpuid(cpuid, cpuid_patches); + CpuidPatch::patch_cpuid(cpuid, &cpuid_patches); CpuidPatch::set_cpuid_reg( cpuid, 0x8000_0008, From 22a23193ca6ae44c91a4a1b0c1f5ccea48b07356 Mon Sep 17 00:00:00 2001 From: Oliver Anderson Date: Tue, 9 Dec 2025 17:21:01 +0100 Subject: [PATCH 290/294] arch: Include Skylake and Sapphire rapids CPU profiles We include CPU profiles corresponding to Intel Skylake and Sapphire rapids server that we generated using our WIP CPU profile generation tool. Signed-of-by: Oliver Anderson On-behalf-of: SAP oliver.anderson@sap.com --- arch/Cargo.toml | 2 + arch/src/x86_64/cpu_profile.rs | 22 +- .../x86_64/cpu_profiles/sapphire-rapids.json | 3436 +++++++++++++++++ arch/src/x86_64/cpu_profiles/skylake.json | 3184 +++++++++++++++ 4 files changed, 6639 insertions(+), 5 deletions(-) create mode 100644 arch/src/x86_64/cpu_profiles/sapphire-rapids.json create mode 100644 arch/src/x86_64/cpu_profiles/skylake.json diff --git a/arch/Cargo.toml b/arch/Cargo.toml index 3e82367707..5cc8b0fd71 100644 --- a/arch/Cargo.toml +++ b/arch/Cargo.toml @@ -19,6 +19,8 @@ libc = { workspace = true } linux-loader = { workspace = true, features = ["bzimage", "elf", "pe"] } log = { workspace = true } serde = { workspace = true, features = ["derive", "rc"] } +# We currently use this for (de-)serializing CPU profile data +serde_json = { workspace = true } thiserror = { workspace = true } uuid = { workspace = true } vm-memory = { workspace = true, features = ["backend-bitmap", "backend-mmap"] } diff --git a/arch/src/x86_64/cpu_profile.rs b/arch/src/x86_64/cpu_profile.rs index 47237d720f..36c8a62e2d 100644 --- a/arch/src/x86_64/cpu_profile.rs +++ b/arch/src/x86_64/cpu_profile.rs @@ -18,7 +18,9 @@ use crate::x86_64::cpuid_definitions::{Parameters, deserialize_from_hex, seriali pub enum CpuProfile { #[default] Host, + #[cfg(feature = "kvm")] Skylake, + #[cfg(feature = "kvm")] SapphireRapids, } @@ -35,16 +37,26 @@ impl CpuProfile { pub(in crate::x86_64) fn data(&self, amx: bool) -> Option { let mut data: CpuProfileData = match self { Self::Host => None, - Self::Skylake => todo!(), - Self::SapphireRapids => todo!(), + Self::Skylake => Some( + serde_json::from_slice(include_bytes!("cpu_profiles/skylake.json")) + .inspect_err(|e| { + error!("BUG: could not deserialize CPU profile. Got error: {:?}", e) + }) + .expect("should be able to deserialize pre-generated data"), + ), + Self::SapphireRapids => Some( + serde_json::from_slice(include_bytes!("cpu_profiles/sapphire-rapids.json")) + .inspect_err(|e| { + error!("BUG: could not deserialize CPU profile. Got error: {:?}", e) + }) + .expect("should be able to deserialize pre-generated data"), + ), }?; if !amx { // In this case we will need to wipe out the AMX tile state components (if they are included in the profile) for adj in data.adjustments.iter_mut() { if adj.0.sub_leaf.start() != adj.0.sub_leaf.end() { - // The generated profiles produce as many sub-leaf entries as possible, and only use ranges for - // values not found. continue; } let sub_leaf = *adj.0.sub_leaf.start(); @@ -73,7 +85,7 @@ impl CpuProfile { } // This will need to be addressed before upstreaming. // We will probably need one profile per hypervisor. - unimplemented!() + unreachable!() } } diff --git a/arch/src/x86_64/cpu_profiles/sapphire-rapids.json b/arch/src/x86_64/cpu_profiles/sapphire-rapids.json new file mode 100644 index 0000000000..0ea90aa979 --- /dev/null +++ b/arch/src/x86_64/cpu_profiles/sapphire-rapids.json @@ -0,0 +1,3436 @@ +{ + "hypervisor": "Kvm", + "cpu_vendor": "Intel", + "adjustments": [ + [ + { + "leaf": "0x00000000", + "sub_leaf": { + "start": 0, + "end": 0 + }, + "register": "EAX" + }, + { + "replacements": "0x00000020", + "mask": "0x00000000" + } + ], + [ + { + "leaf": "0x00000000", + "sub_leaf": { + "start": 0, + "end": 0 + }, + "register": "EBX" + }, + { + "replacements": "0x756e6547", + "mask": "0x00000000" + } + ], + [ + { + "leaf": "0x00000000", + "sub_leaf": { + "start": 0, + "end": 0 + }, + "register": "ECX" + }, + { + "replacements": "0x6c65746e", + "mask": "0x00000000" + } + ], + [ + { + "leaf": "0x00000000", + "sub_leaf": { + "start": 0, + "end": 0 + }, + "register": "EDX" + }, + { + "replacements": "0x49656e69", + "mask": "0x00000000" + } + ], + [ + { + "leaf": "0x00000001", + "sub_leaf": { + "start": 0, + "end": 0 + }, + "register": "EAX" + }, + { + "replacements": "0x000806f8", + "mask": "0x00000000" + } + ], + [ + { + "leaf": "0x00000001", + "sub_leaf": { + "start": 0, + "end": 0 + }, + "register": "EBX" + }, + { + "replacements": "0x00000000", + "mask": "0x0000ff00" + } + ], + [ + { + "leaf": "0x00000001", + "sub_leaf": { + "start": 0, + "end": 0 + }, + "register": "ECX" + }, + { + "replacements": "0x76fa3223", + "mask": "0x80000000" + } + ], + [ + { + "leaf": "0x00000001", + "sub_leaf": { + "start": 0, + "end": 0 + }, + "register": "EDX" + }, + { + "replacements": "0x078bfbff", + "mask": "0x08000000" + } + ], + [ + { + "leaf": "0x00000002", + "sub_leaf": { + "start": 0, + "end": 0 + }, + "register": "EAX" + }, + { + "replacements": "0x00000000", + "mask": "0xffffffff" + } + ], + [ + { + "leaf": "0x00000002", + "sub_leaf": { + "start": 0, + "end": 0 + }, + "register": "EBX" + }, + { + "replacements": "0x00000000", + "mask": "0xffffffff" + } + ], + [ + { + "leaf": "0x00000002", + "sub_leaf": { + "start": 0, + "end": 0 + }, + "register": "ECX" + }, + { + "replacements": "0x00000000", + "mask": "0xffffffff" + } + ], + [ + { + "leaf": "0x00000002", + "sub_leaf": { + "start": 0, + "end": 0 + }, + "register": "EDX" + }, + { + "replacements": "0x00000000", + "mask": "0xffffffff" + } + ], + [ + { + "leaf": "0x00000004", + "sub_leaf": { + "start": 0, + "end": 0 + }, + "register": "EAX" + }, + { + "replacements": "0x00000000", + "mask": "0xffffc3ff" + } + ], + [ + { + "leaf": "0x00000004", + "sub_leaf": { + "start": 1, + "end": 1 + }, + "register": "EAX" + }, + { + "replacements": "0x00000000", + "mask": "0xffffc3ff" + } + ], + [ + { + "leaf": "0x00000004", + "sub_leaf": { + "start": 2, + "end": 2 + }, + "register": "EAX" + }, + { + "replacements": "0x00000000", + "mask": "0xffffc3ff" + } + ], + [ + { + "leaf": "0x00000004", + "sub_leaf": { + "start": 3, + "end": 3 + }, + "register": "EAX" + }, + { + "replacements": "0x00000000", + "mask": "0xffffc3ff" + } + ], + [ + { + "leaf": "0x00000004", + "sub_leaf": { + "start": 4, + "end": 4 + }, + "register": "EAX" + }, + { + "replacements": "0x00000000", + "mask": "0xffffc3ff" + } + ], + [ + { + "leaf": "0x00000004", + "sub_leaf": { + "start": 5, + "end": 4294967295 + }, + "register": "EAX" + }, + { + "replacements": "0x00000000", + "mask": "0xffffc3ff" + } + ], + [ + { + "leaf": "0x00000004", + "sub_leaf": { + "start": 0, + "end": 0 + }, + "register": "EBX" + }, + { + "replacements": "0x00000000", + "mask": "0xffffffff" + } + ], + [ + { + "leaf": "0x00000004", + "sub_leaf": { + "start": 1, + "end": 1 + }, + "register": "EBX" + }, + { + "replacements": "0x00000000", + "mask": "0xffffffff" + } + ], + [ + { + "leaf": "0x00000004", + "sub_leaf": { + "start": 2, + "end": 2 + }, + "register": "EBX" + }, + { + "replacements": "0x00000000", + "mask": "0xffffffff" + } + ], + [ + { + "leaf": "0x00000004", + "sub_leaf": { + "start": 3, + "end": 3 + }, + "register": "EBX" + }, + { + "replacements": "0x00000000", + "mask": "0xffffffff" + } + ], + [ + { + "leaf": "0x00000004", + "sub_leaf": { + "start": 4, + "end": 4 + }, + "register": "EBX" + }, + { + "replacements": "0x00000000", + "mask": "0xffffffff" + } + ], + [ + { + "leaf": "0x00000004", + "sub_leaf": { + "start": 5, + "end": 4294967295 + }, + "register": "EBX" + }, + { + "replacements": "0x00000000", + "mask": "0xffffffff" + } + ], + [ + { + "leaf": "0x00000004", + "sub_leaf": { + "start": 0, + "end": 0 + }, + "register": "ECX" + }, + { + "replacements": "0x00000000", + "mask": "0x7fffffff" + } + ], + [ + { + "leaf": "0x00000004", + "sub_leaf": { + "start": 1, + "end": 1 + }, + "register": "ECX" + }, + { + "replacements": "0x00000000", + "mask": "0x7fffffff" + } + ], + [ + { + "leaf": "0x00000004", + "sub_leaf": { + "start": 2, + "end": 2 + }, + "register": "ECX" + }, + { + "replacements": "0x00000000", + "mask": "0x7fffffff" + } + ], + [ + { + "leaf": "0x00000004", + "sub_leaf": { + "start": 3, + "end": 3 + }, + "register": "ECX" + }, + { + "replacements": "0x00000000", + "mask": "0x7fffffff" + } + ], + [ + { + "leaf": "0x00000004", + "sub_leaf": { + "start": 4, + "end": 4 + }, + "register": "ECX" + }, + { + "replacements": "0x00000000", + "mask": "0x7fffffff" + } + ], + [ + { + "leaf": "0x00000004", + "sub_leaf": { + "start": 5, + "end": 4294967295 + }, + "register": "ECX" + }, + { + "replacements": "0x00000000", + "mask": "0x7fffffff" + } + ], + [ + { + "leaf": "0x00000004", + "sub_leaf": { + "start": 0, + "end": 0 + }, + "register": "EDX" + }, + { + "replacements": "0x00000000", + "mask": "0x00000007" + } + ], + [ + { + "leaf": "0x00000004", + "sub_leaf": { + "start": 1, + "end": 1 + }, + "register": "EDX" + }, + { + "replacements": "0x00000000", + "mask": "0x00000007" + } + ], + [ + { + "leaf": "0x00000004", + "sub_leaf": { + "start": 2, + "end": 2 + }, + "register": "EDX" + }, + { + "replacements": "0x00000000", + "mask": "0x00000007" + } + ], + [ + { + "leaf": "0x00000004", + "sub_leaf": { + "start": 3, + "end": 3 + }, + "register": "EDX" + }, + { + "replacements": "0x00000000", + "mask": "0x00000007" + } + ], + [ + { + "leaf": "0x00000004", + "sub_leaf": { + "start": 4, + "end": 4 + }, + "register": "EDX" + }, + { + "replacements": "0x00000000", + "mask": "0x00000007" + } + ], + [ + { + "leaf": "0x00000004", + "sub_leaf": { + "start": 5, + "end": 4294967295 + }, + "register": "EDX" + }, + { + "replacements": "0x00000000", + "mask": "0x00000007" + } + ], + [ + { + "leaf": "0x00000005", + "sub_leaf": { + "start": 0, + "end": 0 + }, + "register": "EAX" + }, + { + "replacements": "0x00000000", + "mask": "0x00000000" + } + ], + [ + { + "leaf": "0x00000005", + "sub_leaf": { + "start": 0, + "end": 0 + }, + "register": "EBX" + }, + { + "replacements": "0x00000000", + "mask": "0x00000000" + } + ], + [ + { + "leaf": "0x00000005", + "sub_leaf": { + "start": 0, + "end": 0 + }, + "register": "ECX" + }, + { + "replacements": "0x00000000", + "mask": "0x00000000" + } + ], + [ + { + "leaf": "0x00000005", + "sub_leaf": { + "start": 0, + "end": 0 + }, + "register": "EDX" + }, + { + "replacements": "0x00000000", + "mask": "0x00000000" + } + ], + [ + { + "leaf": "0x00000006", + "sub_leaf": { + "start": 0, + "end": 0 + }, + "register": "EAX" + }, + { + "replacements": "0x00000004", + "mask": "0x00000000" + } + ], + [ + { + "leaf": "0x00000006", + "sub_leaf": { + "start": 0, + "end": 0 + }, + "register": "EBX" + }, + { + "replacements": "0x00000000", + "mask": "0x00000000" + } + ], + [ + { + "leaf": "0x00000006", + "sub_leaf": { + "start": 0, + "end": 0 + }, + "register": "ECX" + }, + { + "replacements": "0x00000000", + "mask": "0x00000000" + } + ], + [ + { + "leaf": "0x00000006", + "sub_leaf": { + "start": 0, + "end": 0 + }, + "register": "EDX" + }, + { + "replacements": "0x00000000", + "mask": "0x00000000" + } + ], + [ + { + "leaf": "0x00000007", + "sub_leaf": { + "start": 0, + "end": 0 + }, + "register": "EAX" + }, + { + "replacements": "0x00000002", + "mask": "0x00000000" + } + ], + [ + { + "leaf": "0x00000007", + "sub_leaf": { + "start": 0, + "end": 0 + }, + "register": "EBX" + }, + { + "replacements": "0xf1bf07ab", + "mask": "0x00002040" + } + ], + [ + { + "leaf": "0x00000007", + "sub_leaf": { + "start": 0, + "end": 0 + }, + "register": "ECX" + }, + { + "replacements": "0x1b415f6e", + "mask": "0x00000000" + } + ], + [ + { + "leaf": "0x00000007", + "sub_leaf": { + "start": 0, + "end": 0 + }, + "register": "EDX" + }, + { + "replacements": "0xa7c04010", + "mask": "0x18000400" + } + ], + [ + { + "leaf": "0x00000007", + "sub_leaf": { + "start": 1, + "end": 1 + }, + "register": "EAX" + }, + { + "replacements": "0x00001c30", + "mask": "0x00000000" + } + ], + [ + { + "leaf": "0x00000007", + "sub_leaf": { + "start": 1, + "end": 1 + }, + "register": "EBX" + }, + { + "replacements": "0x00000000", + "mask": "0x00000000" + } + ], + [ + { + "leaf": "0x00000007", + "sub_leaf": { + "start": 1, + "end": 1 + }, + "register": "EDX" + }, + { + "replacements": "0x00000000", + "mask": "0x00000000" + } + ], + [ + { + "leaf": "0x00000007", + "sub_leaf": { + "start": 2, + "end": 2 + }, + "register": "EDX" + }, + { + "replacements": "0x00000017", + "mask": "0x00000000" + } + ], + [ + { + "leaf": "0x00000009", + "sub_leaf": { + "start": 0, + "end": 0 + }, + "register": "EAX" + }, + { + "replacements": "0x00000000", + "mask": "0x00000000" + } + ], + [ + { + "leaf": "0x0000000a", + "sub_leaf": { + "start": 0, + "end": 0 + }, + "register": "EAX" + }, + { + "replacements": "0x00000000", + "mask": "0x00000000" + } + ], + [ + { + "leaf": "0x0000000a", + "sub_leaf": { + "start": 0, + "end": 0 + }, + "register": "EBX" + }, + { + "replacements": "0x00000000", + "mask": "0x00000000" + } + ], + [ + { + "leaf": "0x0000000a", + "sub_leaf": { + "start": 0, + "end": 0 + }, + "register": "ECX" + }, + { + "replacements": "0x00000000", + "mask": "0x00000000" + } + ], + [ + { + "leaf": "0x0000000a", + "sub_leaf": { + "start": 0, + "end": 0 + }, + "register": "EDX" + }, + { + "replacements": "0x00000000", + "mask": "0x00000000" + } + ], + [ + { + "leaf": "0x0000000b", + "sub_leaf": { + "start": 0, + "end": 0 + }, + "register": "EAX" + }, + { + "replacements": "0x00000000", + "mask": "0x0000001f" + } + ], + [ + { + "leaf": "0x0000000b", + "sub_leaf": { + "start": 1, + "end": 4294967295 + }, + "register": "EAX" + }, + { + "replacements": "0x00000000", + "mask": "0x0000001f" + } + ], + [ + { + "leaf": "0x0000000b", + "sub_leaf": { + "start": 0, + "end": 0 + }, + "register": "EBX" + }, + { + "replacements": "0x00000000", + "mask": "0x0000ffff" + } + ], + [ + { + "leaf": "0x0000000b", + "sub_leaf": { + "start": 1, + "end": 4294967295 + }, + "register": "EBX" + }, + { + "replacements": "0x00000000", + "mask": "0x0000ffff" + } + ], + [ + { + "leaf": "0x0000000b", + "sub_leaf": { + "start": 0, + "end": 0 + }, + "register": "ECX" + }, + { + "replacements": "0x00000000", + "mask": "0x0000ffff" + } + ], + [ + { + "leaf": "0x0000000b", + "sub_leaf": { + "start": 1, + "end": 4294967295 + }, + "register": "ECX" + }, + { + "replacements": "0x00000000", + "mask": "0x0000ffff" + } + ], + [ + { + "leaf": "0x0000000b", + "sub_leaf": { + "start": 0, + "end": 0 + }, + "register": "EDX" + }, + { + "replacements": "0x00000000", + "mask": "0xffffffff" + } + ], + [ + { + "leaf": "0x0000000b", + "sub_leaf": { + "start": 1, + "end": 4294967295 + }, + "register": "EDX" + }, + { + "replacements": "0x00000000", + "mask": "0xffffffff" + } + ], + [ + { + "leaf": "0x0000000d", + "sub_leaf": { + "start": 0, + "end": 0 + }, + "register": "EAX" + }, + { + "replacements": "0x000602e7", + "mask": "0x00000000" + } + ], + [ + { + "leaf": "0x0000000d", + "sub_leaf": { + "start": 0, + "end": 0 + }, + "register": "EBX" + }, + { + "replacements": "0x00000000", + "mask": "0xffffffff" + } + ], + [ + { + "leaf": "0x0000000d", + "sub_leaf": { + "start": 0, + "end": 0 + }, + "register": "ECX" + }, + { + "replacements": "0x00000000", + "mask": "0xffffffff" + } + ], + [ + { + "leaf": "0x0000000d", + "sub_leaf": { + "start": 0, + "end": 0 + }, + "register": "EDX" + }, + { + "replacements": "0x00000000", + "mask": "0x00000000" + } + ], + [ + { + "leaf": "0x0000000d", + "sub_leaf": { + "start": 1, + "end": 1 + }, + "register": "EAX" + }, + { + "replacements": "0x0000001f", + "mask": "0x00000000" + } + ], + [ + { + "leaf": "0x0000000d", + "sub_leaf": { + "start": 1, + "end": 1 + }, + "register": "EBX" + }, + { + "replacements": "0x00000000", + "mask": "0xffffffff" + } + ], + [ + { + "leaf": "0x0000000d", + "sub_leaf": { + "start": 1, + "end": 1 + }, + "register": "ECX" + }, + { + "replacements": "0x00000000", + "mask": "0x00000000" + } + ], + [ + { + "leaf": "0x0000000d", + "sub_leaf": { + "start": 1, + "end": 1 + }, + "register": "EDX" + }, + { + "replacements": "0x00000000", + "mask": "0x00000000" + } + ], + [ + { + "leaf": "0x0000000d", + "sub_leaf": { + "start": 2, + "end": 2 + }, + "register": "EAX" + }, + { + "replacements": "0x00000100", + "mask": "0x00000000" + } + ], + [ + { + "leaf": "0x0000000d", + "sub_leaf": { + "start": 3, + "end": 4 + }, + "register": "EAX" + }, + { + "replacements": "0x00000000", + "mask": "0x00000000" + } + ], + [ + { + "leaf": "0x0000000d", + "sub_leaf": { + "start": 5, + "end": 5 + }, + "register": "EAX" + }, + { + "replacements": "0x00000040", + "mask": "0x00000000" + } + ], + [ + { + "leaf": "0x0000000d", + "sub_leaf": { + "start": 6, + "end": 6 + }, + "register": "EAX" + }, + { + "replacements": "0x00000200", + "mask": "0x00000000" + } + ], + [ + { + "leaf": "0x0000000d", + "sub_leaf": { + "start": 7, + "end": 7 + }, + "register": "EAX" + }, + { + "replacements": "0x00000400", + "mask": "0x00000000" + } + ], + [ + { + "leaf": "0x0000000d", + "sub_leaf": { + "start": 8, + "end": 8 + }, + "register": "EAX" + }, + { + "replacements": "0x00000000", + "mask": "0x00000000" + } + ], + [ + { + "leaf": "0x0000000d", + "sub_leaf": { + "start": 9, + "end": 9 + }, + "register": "EAX" + }, + { + "replacements": "0x00000008", + "mask": "0x00000000" + } + ], + [ + { + "leaf": "0x0000000d", + "sub_leaf": { + "start": 10, + "end": 16 + }, + "register": "EAX" + }, + { + "replacements": "0x00000000", + "mask": "0x00000000" + } + ], + [ + { + "leaf": "0x0000000d", + "sub_leaf": { + "start": 17, + "end": 17 + }, + "register": "EAX" + }, + { + "replacements": "0x00000040", + "mask": "0x00000000" + } + ], + [ + { + "leaf": "0x0000000d", + "sub_leaf": { + "start": 18, + "end": 18 + }, + "register": "EAX" + }, + { + "replacements": "0x00002000", + "mask": "0x00000000" + } + ], + [ + { + "leaf": "0x0000000d", + "sub_leaf": { + "start": 19, + "end": 63 + }, + "register": "EAX" + }, + { + "replacements": "0x00000000", + "mask": "0x00000000" + } + ], + [ + { + "leaf": "0x0000000d", + "sub_leaf": { + "start": 2, + "end": 2 + }, + "register": "EBX" + }, + { + "replacements": "0x00000240", + "mask": "0x00000000" + } + ], + [ + { + "leaf": "0x0000000d", + "sub_leaf": { + "start": 3, + "end": 4 + }, + "register": "EBX" + }, + { + "replacements": "0x00000000", + "mask": "0x00000000" + } + ], + [ + { + "leaf": "0x0000000d", + "sub_leaf": { + "start": 5, + "end": 5 + }, + "register": "EBX" + }, + { + "replacements": "0x00000440", + "mask": "0x00000000" + } + ], + [ + { + "leaf": "0x0000000d", + "sub_leaf": { + "start": 6, + "end": 6 + }, + "register": "EBX" + }, + { + "replacements": "0x00000480", + "mask": "0x00000000" + } + ], + [ + { + "leaf": "0x0000000d", + "sub_leaf": { + "start": 7, + "end": 7 + }, + "register": "EBX" + }, + { + "replacements": "0x00000680", + "mask": "0x00000000" + } + ], + [ + { + "leaf": "0x0000000d", + "sub_leaf": { + "start": 8, + "end": 8 + }, + "register": "EBX" + }, + { + "replacements": "0x00000000", + "mask": "0x00000000" + } + ], + [ + { + "leaf": "0x0000000d", + "sub_leaf": { + "start": 9, + "end": 9 + }, + "register": "EBX" + }, + { + "replacements": "0x00000a80", + "mask": "0x00000000" + } + ], + [ + { + "leaf": "0x0000000d", + "sub_leaf": { + "start": 10, + "end": 16 + }, + "register": "EBX" + }, + { + "replacements": "0x00000000", + "mask": "0x00000000" + } + ], + [ + { + "leaf": "0x0000000d", + "sub_leaf": { + "start": 17, + "end": 17 + }, + "register": "EBX" + }, + { + "replacements": "0x00000ac0", + "mask": "0x00000000" + } + ], + [ + { + "leaf": "0x0000000d", + "sub_leaf": { + "start": 18, + "end": 18 + }, + "register": "EBX" + }, + { + "replacements": "0x00000b00", + "mask": "0x00000000" + } + ], + [ + { + "leaf": "0x0000000d", + "sub_leaf": { + "start": 19, + "end": 63 + }, + "register": "EBX" + }, + { + "replacements": "0x00000000", + "mask": "0x00000000" + } + ], + [ + { + "leaf": "0x0000000d", + "sub_leaf": { + "start": 2, + "end": 2 + }, + "register": "ECX" + }, + { + "replacements": "0x00000000", + "mask": "0x00000000" + } + ], + [ + { + "leaf": "0x0000000d", + "sub_leaf": { + "start": 3, + "end": 4 + }, + "register": "ECX" + }, + { + "replacements": "0x00000000", + "mask": "0x00000000" + } + ], + [ + { + "leaf": "0x0000000d", + "sub_leaf": { + "start": 5, + "end": 5 + }, + "register": "ECX" + }, + { + "replacements": "0x00000000", + "mask": "0x00000000" + } + ], + [ + { + "leaf": "0x0000000d", + "sub_leaf": { + "start": 6, + "end": 6 + }, + "register": "ECX" + }, + { + "replacements": "0x00000000", + "mask": "0x00000000" + } + ], + [ + { + "leaf": "0x0000000d", + "sub_leaf": { + "start": 7, + "end": 7 + }, + "register": "ECX" + }, + { + "replacements": "0x00000000", + "mask": "0x00000000" + } + ], + [ + { + "leaf": "0x0000000d", + "sub_leaf": { + "start": 8, + "end": 8 + }, + "register": "ECX" + }, + { + "replacements": "0x00000000", + "mask": "0x00000000" + } + ], + [ + { + "leaf": "0x0000000d", + "sub_leaf": { + "start": 9, + "end": 9 + }, + "register": "ECX" + }, + { + "replacements": "0x00000000", + "mask": "0x00000000" + } + ], + [ + { + "leaf": "0x0000000d", + "sub_leaf": { + "start": 10, + "end": 16 + }, + "register": "ECX" + }, + { + "replacements": "0x00000000", + "mask": "0x00000000" + } + ], + [ + { + "leaf": "0x0000000d", + "sub_leaf": { + "start": 17, + "end": 17 + }, + "register": "ECX" + }, + { + "replacements": "0x00000002", + "mask": "0x00000000" + } + ], + [ + { + "leaf": "0x0000000d", + "sub_leaf": { + "start": 18, + "end": 18 + }, + "register": "ECX" + }, + { + "replacements": "0x00000006", + "mask": "0x00000000" + } + ], + [ + { + "leaf": "0x0000000d", + "sub_leaf": { + "start": 19, + "end": 63 + }, + "register": "ECX" + }, + { + "replacements": "0x00000000", + "mask": "0x00000000" + } + ], + [ + { + "leaf": "0x0000000d", + "sub_leaf": { + "start": 3, + "end": 4 + }, + "register": "EAX" + }, + { + "replacements": "0x00000000", + "mask": "0x00000000" + } + ], + [ + { + "leaf": "0x0000000d", + "sub_leaf": { + "start": 3, + "end": 4 + }, + "register": "EBX" + }, + { + "replacements": "0x00000000", + "mask": "0x00000000" + } + ], + [ + { + "leaf": "0x0000000d", + "sub_leaf": { + "start": 3, + "end": 4 + }, + "register": "ECX" + }, + { + "replacements": "0x00000000", + "mask": "0x00000000" + } + ], + [ + { + "leaf": "0x0000000d", + "sub_leaf": { + "start": 3, + "end": 4 + }, + "register": "EDX" + }, + { + "replacements": "0x00000000", + "mask": "0x00000000" + } + ], + [ + { + "leaf": "0x0000000d", + "sub_leaf": { + "start": 5, + "end": 5 + }, + "register": "EAX" + }, + { + "replacements": "0x00000040", + "mask": "0x00000000" + } + ], + [ + { + "leaf": "0x0000000d", + "sub_leaf": { + "start": 6, + "end": 6 + }, + "register": "EAX" + }, + { + "replacements": "0x00000200", + "mask": "0x00000000" + } + ], + [ + { + "leaf": "0x0000000d", + "sub_leaf": { + "start": 7, + "end": 7 + }, + "register": "EAX" + }, + { + "replacements": "0x00000400", + "mask": "0x00000000" + } + ], + [ + { + "leaf": "0x0000000d", + "sub_leaf": { + "start": 8, + "end": 8 + }, + "register": "EAX" + }, + { + "replacements": "0x00000000", + "mask": "0x00000000" + } + ], + [ + { + "leaf": "0x0000000d", + "sub_leaf": { + "start": 9, + "end": 9 + }, + "register": "EAX" + }, + { + "replacements": "0x00000008", + "mask": "0x00000000" + } + ], + [ + { + "leaf": "0x0000000d", + "sub_leaf": { + "start": 10, + "end": 16 + }, + "register": "EAX" + }, + { + "replacements": "0x00000000", + "mask": "0x00000000" + } + ], + [ + { + "leaf": "0x0000000d", + "sub_leaf": { + "start": 17, + "end": 17 + }, + "register": "EAX" + }, + { + "replacements": "0x00000040", + "mask": "0x00000000" + } + ], + [ + { + "leaf": "0x0000000d", + "sub_leaf": { + "start": 18, + "end": 18 + }, + "register": "EAX" + }, + { + "replacements": "0x00002000", + "mask": "0x00000000" + } + ], + [ + { + "leaf": "0x0000000d", + "sub_leaf": { + "start": 19, + "end": 63 + }, + "register": "EAX" + }, + { + "replacements": "0x00000000", + "mask": "0x00000000" + } + ], + [ + { + "leaf": "0x0000000d", + "sub_leaf": { + "start": 5, + "end": 5 + }, + "register": "EBX" + }, + { + "replacements": "0x00000440", + "mask": "0x00000000" + } + ], + [ + { + "leaf": "0x0000000d", + "sub_leaf": { + "start": 6, + "end": 6 + }, + "register": "EBX" + }, + { + "replacements": "0x00000480", + "mask": "0x00000000" + } + ], + [ + { + "leaf": "0x0000000d", + "sub_leaf": { + "start": 7, + "end": 7 + }, + "register": "EBX" + }, + { + "replacements": "0x00000680", + "mask": "0x00000000" + } + ], + [ + { + "leaf": "0x0000000d", + "sub_leaf": { + "start": 8, + "end": 8 + }, + "register": "EBX" + }, + { + "replacements": "0x00000000", + "mask": "0x00000000" + } + ], + [ + { + "leaf": "0x0000000d", + "sub_leaf": { + "start": 9, + "end": 9 + }, + "register": "EBX" + }, + { + "replacements": "0x00000a80", + "mask": "0x00000000" + } + ], + [ + { + "leaf": "0x0000000d", + "sub_leaf": { + "start": 10, + "end": 16 + }, + "register": "EBX" + }, + { + "replacements": "0x00000000", + "mask": "0x00000000" + } + ], + [ + { + "leaf": "0x0000000d", + "sub_leaf": { + "start": 17, + "end": 17 + }, + "register": "EBX" + }, + { + "replacements": "0x00000ac0", + "mask": "0x00000000" + } + ], + [ + { + "leaf": "0x0000000d", + "sub_leaf": { + "start": 18, + "end": 18 + }, + "register": "EBX" + }, + { + "replacements": "0x00000b00", + "mask": "0x00000000" + } + ], + [ + { + "leaf": "0x0000000d", + "sub_leaf": { + "start": 19, + "end": 63 + }, + "register": "EBX" + }, + { + "replacements": "0x00000000", + "mask": "0x00000000" + } + ], + [ + { + "leaf": "0x0000000d", + "sub_leaf": { + "start": 5, + "end": 5 + }, + "register": "ECX" + }, + { + "replacements": "0x00000000", + "mask": "0x00000000" + } + ], + [ + { + "leaf": "0x0000000d", + "sub_leaf": { + "start": 6, + "end": 6 + }, + "register": "ECX" + }, + { + "replacements": "0x00000000", + "mask": "0x00000000" + } + ], + [ + { + "leaf": "0x0000000d", + "sub_leaf": { + "start": 7, + "end": 7 + }, + "register": "ECX" + }, + { + "replacements": "0x00000000", + "mask": "0x00000000" + } + ], + [ + { + "leaf": "0x0000000d", + "sub_leaf": { + "start": 8, + "end": 8 + }, + "register": "ECX" + }, + { + "replacements": "0x00000000", + "mask": "0x00000000" + } + ], + [ + { + "leaf": "0x0000000d", + "sub_leaf": { + "start": 9, + "end": 9 + }, + "register": "ECX" + }, + { + "replacements": "0x00000000", + "mask": "0x00000000" + } + ], + [ + { + "leaf": "0x0000000d", + "sub_leaf": { + "start": 10, + "end": 16 + }, + "register": "ECX" + }, + { + "replacements": "0x00000000", + "mask": "0x00000000" + } + ], + [ + { + "leaf": "0x0000000d", + "sub_leaf": { + "start": 17, + "end": 17 + }, + "register": "ECX" + }, + { + "replacements": "0x00000002", + "mask": "0x00000000" + } + ], + [ + { + "leaf": "0x0000000d", + "sub_leaf": { + "start": 18, + "end": 18 + }, + "register": "ECX" + }, + { + "replacements": "0x00000006", + "mask": "0x00000000" + } + ], + [ + { + "leaf": "0x0000000d", + "sub_leaf": { + "start": 19, + "end": 63 + }, + "register": "ECX" + }, + { + "replacements": "0x00000000", + "mask": "0x00000000" + } + ], + [ + { + "leaf": "0x0000000f", + "sub_leaf": { + "start": 0, + "end": 0 + }, + "register": "EBX" + }, + { + "replacements": "0x00000000", + "mask": "0x00000000" + } + ], + [ + { + "leaf": "0x0000000f", + "sub_leaf": { + "start": 0, + "end": 0 + }, + "register": "EDX" + }, + { + "replacements": "0x00000000", + "mask": "0x00000000" + } + ], + [ + { + "leaf": "0x0000000f", + "sub_leaf": { + "start": 1, + "end": 1 + }, + "register": "EAX" + }, + { + "replacements": "0x00000000", + "mask": "0x00000000" + } + ], + [ + { + "leaf": "0x0000000f", + "sub_leaf": { + "start": 1, + "end": 1 + }, + "register": "EBX" + }, + { + "replacements": "0x00000000", + "mask": "0x00000000" + } + ], + [ + { + "leaf": "0x0000000f", + "sub_leaf": { + "start": 1, + "end": 1 + }, + "register": "ECX" + }, + { + "replacements": "0x00000000", + "mask": "0x00000000" + } + ], + [ + { + "leaf": "0x0000000f", + "sub_leaf": { + "start": 1, + "end": 1 + }, + "register": "EDX" + }, + { + "replacements": "0x00000000", + "mask": "0x00000000" + } + ], + [ + { + "leaf": "0x00000010", + "sub_leaf": { + "start": 0, + "end": 0 + }, + "register": "EBX" + }, + { + "replacements": "0x00000000", + "mask": "0x00000000" + } + ], + [ + { + "leaf": "0x00000010", + "sub_leaf": { + "start": 1, + "end": 1 + }, + "register": "EAX" + }, + { + "replacements": "0x00000000", + "mask": "0x0000001f" + } + ], + [ + { + "leaf": "0x00000010", + "sub_leaf": { + "start": 1, + "end": 1 + }, + "register": "EBX" + }, + { + "replacements": "0x00000000", + "mask": "0xffffffff" + } + ], + [ + { + "leaf": "0x00000010", + "sub_leaf": { + "start": 1, + "end": 1 + }, + "register": "ECX" + }, + { + "replacements": "0x00000000", + "mask": "0x00000000" + } + ], + [ + { + "leaf": "0x00000010", + "sub_leaf": { + "start": 1, + "end": 1 + }, + "register": "EDX" + }, + { + "replacements": "0x00000000", + "mask": "0x00000000" + } + ], + [ + { + "leaf": "0x00000010", + "sub_leaf": { + "start": 2, + "end": 2 + }, + "register": "EAX" + }, + { + "replacements": "0x00000000", + "mask": "0x0000001f" + } + ], + [ + { + "leaf": "0x00000010", + "sub_leaf": { + "start": 2, + "end": 2 + }, + "register": "EBX" + }, + { + "replacements": "0x00000000", + "mask": "0xffffffff" + } + ], + [ + { + "leaf": "0x00000010", + "sub_leaf": { + "start": 2, + "end": 2 + }, + "register": "EDX" + }, + { + "replacements": "0x00000000", + "mask": "0x00000000" + } + ], + [ + { + "leaf": "0x00000010", + "sub_leaf": { + "start": 2, + "end": 2 + }, + "register": "ECX" + }, + { + "replacements": "0x00000000", + "mask": "0x00000000" + } + ], + [ + { + "leaf": "0x00000010", + "sub_leaf": { + "start": 3, + "end": 3 + }, + "register": "EAX" + }, + { + "replacements": "0x00000000", + "mask": "0x00000000" + } + ], + [ + { + "leaf": "0x00000010", + "sub_leaf": { + "start": 3, + "end": 3 + }, + "register": "ECX" + }, + { + "replacements": "0x00000000", + "mask": "0x00000000" + } + ], + [ + { + "leaf": "0x00000010", + "sub_leaf": { + "start": 3, + "end": 3 + }, + "register": "EDX" + }, + { + "replacements": "0x00000000", + "mask": "0x00000000" + } + ], + [ + { + "leaf": "0x00000010", + "sub_leaf": { + "start": 5, + "end": 5 + }, + "register": "EAX" + }, + { + "replacements": "0x00000000", + "mask": "0x00000000" + } + ], + [ + { + "leaf": "0x00000010", + "sub_leaf": { + "start": 5, + "end": 5 + }, + "register": "ECX" + }, + { + "replacements": "0x00000000", + "mask": "0x00000000" + } + ], + [ + { + "leaf": "0x00000010", + "sub_leaf": { + "start": 5, + "end": 5 + }, + "register": "EDX" + }, + { + "replacements": "0x00000000", + "mask": "0x00000000" + } + ], + [ + { + "leaf": "0x00000014", + "sub_leaf": { + "start": 0, + "end": 0 + }, + "register": "EAX" + }, + { + "replacements": "0x00000000", + "mask": "0x00000000" + } + ], + [ + { + "leaf": "0x00000014", + "sub_leaf": { + "start": 0, + "end": 0 + }, + "register": "EBX" + }, + { + "replacements": "0x00000000", + "mask": "0x00000000" + } + ], + [ + { + "leaf": "0x00000014", + "sub_leaf": { + "start": 0, + "end": 0 + }, + "register": "ECX" + }, + { + "replacements": "0x00000000", + "mask": "0x00000000" + } + ], + [ + { + "leaf": "0x00000014", + "sub_leaf": { + "start": 1, + "end": 1 + }, + "register": "EAX" + }, + { + "replacements": "0x00000000", + "mask": "0x00000000" + } + ], + [ + { + "leaf": "0x00000014", + "sub_leaf": { + "start": 1, + "end": 1 + }, + "register": "EBX" + }, + { + "replacements": "0x00000000", + "mask": "0x00000000" + } + ], + [ + { + "leaf": "0x00000015", + "sub_leaf": { + "start": 0, + "end": 0 + }, + "register": "EAX" + }, + { + "replacements": "0x00000000", + "mask": "0xffffffff" + } + ], + [ + { + "leaf": "0x00000015", + "sub_leaf": { + "start": 0, + "end": 0 + }, + "register": "EBX" + }, + { + "replacements": "0x00000000", + "mask": "0xffffffff" + } + ], + [ + { + "leaf": "0x00000015", + "sub_leaf": { + "start": 0, + "end": 0 + }, + "register": "ECX" + }, + { + "replacements": "0x00000000", + "mask": "0xffffffff" + } + ], + [ + { + "leaf": "0x00000016", + "sub_leaf": { + "start": 0, + "end": 0 + }, + "register": "EAX" + }, + { + "replacements": "0x00000000", + "mask": "0x0000ffff" + } + ], + [ + { + "leaf": "0x00000016", + "sub_leaf": { + "start": 0, + "end": 0 + }, + "register": "EBX" + }, + { + "replacements": "0x00000000", + "mask": "0x0000ffff" + } + ], + [ + { + "leaf": "0x00000016", + "sub_leaf": { + "start": 0, + "end": 0 + }, + "register": "ECX" + }, + { + "replacements": "0x00000000", + "mask": "0x0000ffff" + } + ], + [ + { + "leaf": "0x00000017", + "sub_leaf": { + "start": 0, + "end": 0 + }, + "register": "EAX" + }, + { + "replacements": "0x00000000", + "mask": "0x00000000" + } + ], + [ + { + "leaf": "0x00000018", + "sub_leaf": { + "start": 0, + "end": 0 + }, + "register": "EAX" + }, + { + "replacements": "0x00000000", + "mask": "0xffffffff" + } + ], + [ + { + "leaf": "0x00000018", + "sub_leaf": { + "start": 0, + "end": 0 + }, + "register": "EBX" + }, + { + "replacements": "0x00000000", + "mask": "0xffff070f" + } + ], + [ + { + "leaf": "0x00000018", + "sub_leaf": { + "start": 1, + "end": 4294967295 + }, + "register": "EBX" + }, + { + "replacements": "0x00000000", + "mask": "0xffff070f" + } + ], + [ + { + "leaf": "0x00000018", + "sub_leaf": { + "start": 0, + "end": 0 + }, + "register": "ECX" + }, + { + "replacements": "0x00000000", + "mask": "0xffffffff" + } + ], + [ + { + "leaf": "0x00000018", + "sub_leaf": { + "start": 1, + "end": 4294967295 + }, + "register": "ECX" + }, + { + "replacements": "0x00000000", + "mask": "0xffffffff" + } + ], + [ + { + "leaf": "0x00000018", + "sub_leaf": { + "start": 0, + "end": 0 + }, + "register": "EDX" + }, + { + "replacements": "0x00000000", + "mask": "0x03ffc1ff" + } + ], + [ + { + "leaf": "0x00000018", + "sub_leaf": { + "start": 1, + "end": 4294967295 + }, + "register": "EDX" + }, + { + "replacements": "0x00000000", + "mask": "0x03ffc1ff" + } + ], + [ + { + "leaf": "0x0000001c", + "sub_leaf": { + "start": 0, + "end": 0 + }, + "register": "EAX" + }, + { + "replacements": "0x00000000", + "mask": "0x00000000" + } + ], + [ + { + "leaf": "0x0000001c", + "sub_leaf": { + "start": 0, + "end": 0 + }, + "register": "EBX" + }, + { + "replacements": "0x00000000", + "mask": "0x00000000" + } + ], + [ + { + "leaf": "0x0000001c", + "sub_leaf": { + "start": 0, + "end": 0 + }, + "register": "ECX" + }, + { + "replacements": "0x00000000", + "mask": "0x00000000" + } + ], + [ + { + "leaf": "0x0000001d", + "sub_leaf": { + "start": 0, + "end": 0 + }, + "register": "EAX" + }, + { + "replacements": "0x00000001", + "mask": "0x00000000" + } + ], + [ + { + "leaf": "0x0000001d", + "sub_leaf": { + "start": 1, + "end": 1 + }, + "register": "EAX" + }, + { + "replacements": "0x04002000", + "mask": "0x00000000" + } + ], + [ + { + "leaf": "0x0000001d", + "sub_leaf": { + "start": 1, + "end": 1 + }, + "register": "EBX" + }, + { + "replacements": "0x00080040", + "mask": "0x00000000" + } + ], + [ + { + "leaf": "0x0000001d", + "sub_leaf": { + "start": 1, + "end": 1 + }, + "register": "ECX" + }, + { + "replacements": "0x00000010", + "mask": "0x00000000" + } + ], + [ + { + "leaf": "0x0000001e", + "sub_leaf": { + "start": 0, + "end": 0 + }, + "register": "EAX" + }, + { + "replacements": "0x00000000", + "mask": "0x00000000" + } + ], + [ + { + "leaf": "0x0000001e", + "sub_leaf": { + "start": 0, + "end": 0 + }, + "register": "EBX" + }, + { + "replacements": "0x00004010", + "mask": "0x00000000" + } + ], + [ + { + "leaf": "0x0000001e", + "sub_leaf": { + "start": 1, + "end": 1 + }, + "register": "EAX" + }, + { + "replacements": "0x00000000", + "mask": "0x00000000" + } + ], + [ + { + "leaf": "0x0000001f", + "sub_leaf": { + "start": 0, + "end": 0 + }, + "register": "EAX" + }, + { + "replacements": "0x00000000", + "mask": "0x0000001f" + } + ], + [ + { + "leaf": "0x0000001f", + "sub_leaf": { + "start": 1, + "end": 4294967295 + }, + "register": "EAX" + }, + { + "replacements": "0x00000000", + "mask": "0x0000001f" + } + ], + [ + { + "leaf": "0x0000001f", + "sub_leaf": { + "start": 0, + "end": 0 + }, + "register": "EBX" + }, + { + "replacements": "0x00000000", + "mask": "0x0000ffff" + } + ], + [ + { + "leaf": "0x0000001f", + "sub_leaf": { + "start": 1, + "end": 4294967295 + }, + "register": "EBX" + }, + { + "replacements": "0x00000000", + "mask": "0x0000ffff" + } + ], + [ + { + "leaf": "0x0000001f", + "sub_leaf": { + "start": 0, + "end": 0 + }, + "register": "ECX" + }, + { + "replacements": "0x00000000", + "mask": "0x0000ffff" + } + ], + [ + { + "leaf": "0x0000001f", + "sub_leaf": { + "start": 1, + "end": 4294967295 + }, + "register": "ECX" + }, + { + "replacements": "0x00000000", + "mask": "0x0000ffff" + } + ], + [ + { + "leaf": "0x0000001f", + "sub_leaf": { + "start": 0, + "end": 0 + }, + "register": "EDX" + }, + { + "replacements": "0x00000000", + "mask": "0xffffffff" + } + ], + [ + { + "leaf": "0x0000001f", + "sub_leaf": { + "start": 1, + "end": 4294967295 + }, + "register": "EDX" + }, + { + "replacements": "0x00000000", + "mask": "0xffffffff" + } + ], + [ + { + "leaf": "0x00000020", + "sub_leaf": { + "start": 0, + "end": 0 + }, + "register": "EAX" + }, + { + "replacements": "0x00000000", + "mask": "0x00000000" + } + ], + [ + { + "leaf": "0x00000020", + "sub_leaf": { + "start": 0, + "end": 0 + }, + "register": "EBX" + }, + { + "replacements": "0x00000000", + "mask": "0x00000000" + } + ], + [ + { + "leaf": "0x00000021", + "sub_leaf": { + "start": 0, + "end": 0 + }, + "register": "EBX" + }, + { + "replacements": "0x00000000", + "mask": "0x00000000" + } + ], + [ + { + "leaf": "0x00000021", + "sub_leaf": { + "start": 0, + "end": 0 + }, + "register": "ECX" + }, + { + "replacements": "0x00000000", + "mask": "0x00000000" + } + ], + [ + { + "leaf": "0x00000021", + "sub_leaf": { + "start": 0, + "end": 0 + }, + "register": "EDX" + }, + { + "replacements": "0x00000000", + "mask": "0x00000000" + } + ], + [ + { + "leaf": "0x00000023", + "sub_leaf": { + "start": 0, + "end": 0 + }, + "register": "EAX" + }, + { + "replacements": "0x00000000", + "mask": "0x00000000" + } + ], + [ + { + "leaf": "0x00000023", + "sub_leaf": { + "start": 0, + "end": 0 + }, + "register": "EBX" + }, + { + "replacements": "0x00000000", + "mask": "0x00000000" + } + ], + [ + { + "leaf": "0x00000023", + "sub_leaf": { + "start": 0, + "end": 0 + }, + "register": "ECX" + }, + { + "replacements": "0x00000000", + "mask": "0x00000000" + } + ], + [ + { + "leaf": "0x00000023", + "sub_leaf": { + "start": 1, + "end": 1 + }, + "register": "EAX" + }, + { + "replacements": "0x00000000", + "mask": "0x00000000" + } + ], + [ + { + "leaf": "0x00000023", + "sub_leaf": { + "start": 1, + "end": 1 + }, + "register": "EBX" + }, + { + "replacements": "0x00000000", + "mask": "0x00000000" + } + ], + [ + { + "leaf": "0x00000023", + "sub_leaf": { + "start": 2, + "end": 2 + }, + "register": "EAX" + }, + { + "replacements": "0x00000000", + "mask": "0x00000000" + } + ], + [ + { + "leaf": "0x00000023", + "sub_leaf": { + "start": 3, + "end": 3 + }, + "register": "EAX" + }, + { + "replacements": "0x00000000", + "mask": "0x00000000" + } + ], + [ + { + "leaf": "0x00000023", + "sub_leaf": { + "start": 4, + "end": 4 + }, + "register": "EBX" + }, + { + "replacements": "0x00000000", + "mask": "0x00000000" + } + ], + [ + { + "leaf": "0x00000023", + "sub_leaf": { + "start": 4, + "end": 4 + }, + "register": "EBX" + }, + { + "replacements": "0x00000000", + "mask": "0x00000000" + } + ], + [ + { + "leaf": "0x00000023", + "sub_leaf": { + "start": 5, + "end": 5 + }, + "register": "EAX" + }, + { + "replacements": "0x00000000", + "mask": "0x00000000" + } + ], + [ + { + "leaf": "0x00000023", + "sub_leaf": { + "start": 5, + "end": 5 + }, + "register": "EBX" + }, + { + "replacements": "0x00000000", + "mask": "0x00000000" + } + ], + [ + { + "leaf": "0x00000023", + "sub_leaf": { + "start": 5, + "end": 5 + }, + "register": "ECX" + }, + { + "replacements": "0x00000000", + "mask": "0x00000000" + } + ], + [ + { + "leaf": "0x00000023", + "sub_leaf": { + "start": 5, + "end": 5 + }, + "register": "EDX" + }, + { + "replacements": "0x00000000", + "mask": "0x00000000" + } + ], + [ + { + "leaf": "0x00000024", + "sub_leaf": { + "start": 0, + "end": 0 + }, + "register": "EAX" + }, + { + "replacements": "0x00000000", + "mask": "0x00000000" + } + ], + [ + { + "leaf": "0x00000024", + "sub_leaf": { + "start": 0, + "end": 0 + }, + "register": "EBX" + }, + { + "replacements": "0x00000000", + "mask": "0x00000000" + } + ], + [ + { + "leaf": "0x80000000", + "sub_leaf": { + "start": 0, + "end": 0 + }, + "register": "EAX" + }, + { + "replacements": "0x80000008", + "mask": "0x00000000" + } + ], + [ + { + "leaf": "0x80000000", + "sub_leaf": { + "start": 0, + "end": 0 + }, + "register": "EBX" + }, + { + "replacements": "0x00000000", + "mask": "0xffffffff" + } + ], + [ + { + "leaf": "0x80000000", + "sub_leaf": { + "start": 0, + "end": 0 + }, + "register": "ECX" + }, + { + "replacements": "0x00000000", + "mask": "0xffffffff" + } + ], + [ + { + "leaf": "0x80000000", + "sub_leaf": { + "start": 0, + "end": 0 + }, + "register": "EDX" + }, + { + "replacements": "0x00000000", + "mask": "0xffffffff" + } + ], + [ + { + "leaf": "0x80000001", + "sub_leaf": { + "start": 0, + "end": 0 + }, + "register": "EAX" + }, + { + "replacements": "0x00000000", + "mask": "0x0fff3fff" + } + ], + [ + { + "leaf": "0x80000001", + "sub_leaf": { + "start": 0, + "end": 0 + }, + "register": "EBX" + }, + { + "replacements": "0x00000000", + "mask": "0xf000ffff" + } + ], + [ + { + "leaf": "0x80000001", + "sub_leaf": { + "start": 0, + "end": 0 + }, + "register": "ECX" + }, + { + "replacements": "0x00000121", + "mask": "0x00000000" + } + ], + [ + { + "leaf": "0x80000001", + "sub_leaf": { + "start": 0, + "end": 0 + }, + "register": "EDX" + }, + { + "replacements": "0x2c100800", + "mask": "0x00000000" + } + ], + [ + { + "leaf": "0x80000002", + "sub_leaf": { + "start": 0, + "end": 0 + }, + "register": "EAX" + }, + { + "replacements": "0x65746e49", + "mask": "0x00000000" + } + ], + [ + { + "leaf": "0x80000002", + "sub_leaf": { + "start": 0, + "end": 0 + }, + "register": "EBX" + }, + { + "replacements": "0x6153206c", + "mask": "0x00000000" + } + ], + [ + { + "leaf": "0x80000002", + "sub_leaf": { + "start": 0, + "end": 0 + }, + "register": "ECX" + }, + { + "replacements": "0x69687070", + "mask": "0x00000000" + } + ], + [ + { + "leaf": "0x80000002", + "sub_leaf": { + "start": 0, + "end": 0 + }, + "register": "EDX" + }, + { + "replacements": "0x72206572", + "mask": "0x00000000" + } + ], + [ + { + "leaf": "0x80000003", + "sub_leaf": { + "start": 0, + "end": 0 + }, + "register": "EAX" + }, + { + "replacements": "0x64697061", + "mask": "0x00000000" + } + ], + [ + { + "leaf": "0x80000003", + "sub_leaf": { + "start": 0, + "end": 0 + }, + "register": "EBX" + }, + { + "replacements": "0x00000073", + "mask": "0x00000000" + } + ], + [ + { + "leaf": "0x80000003", + "sub_leaf": { + "start": 0, + "end": 0 + }, + "register": "ECX" + }, + { + "replacements": "0x00000000", + "mask": "0x00000000" + } + ], + [ + { + "leaf": "0x80000003", + "sub_leaf": { + "start": 0, + "end": 0 + }, + "register": "EDX" + }, + { + "replacements": "0x00000000", + "mask": "0x00000000" + } + ], + [ + { + "leaf": "0x80000004", + "sub_leaf": { + "start": 0, + "end": 0 + }, + "register": "EAX" + }, + { + "replacements": "0x00000000", + "mask": "0x00000000" + } + ], + [ + { + "leaf": "0x80000004", + "sub_leaf": { + "start": 0, + "end": 0 + }, + "register": "EBX" + }, + { + "replacements": "0x00000000", + "mask": "0x00000000" + } + ], + [ + { + "leaf": "0x80000004", + "sub_leaf": { + "start": 0, + "end": 0 + }, + "register": "ECX" + }, + { + "replacements": "0x00000000", + "mask": "0x00000000" + } + ], + [ + { + "leaf": "0x80000004", + "sub_leaf": { + "start": 0, + "end": 0 + }, + "register": "EDX" + }, + { + "replacements": "0x00000000", + "mask": "0x00000000" + } + ], + [ + { + "leaf": "0x80000006", + "sub_leaf": { + "start": 0, + "end": 0 + }, + "register": "ECX" + }, + { + "replacements": "0x00000000", + "mask": "0xffffffff" + } + ], + [ + { + "leaf": "0x80000007", + "sub_leaf": { + "start": 0, + "end": 0 + }, + "register": "EDX" + }, + { + "replacements": "0x00000100", + "mask": "0x00000000" + } + ], + [ + { + "leaf": "0x80000008", + "sub_leaf": { + "start": 0, + "end": 0 + }, + "register": "EAX" + }, + { + "replacements": "0x00000000", + "mask": "0x00ffffff" + } + ], + [ + { + "leaf": "0x80000008", + "sub_leaf": { + "start": 0, + "end": 0 + }, + "register": "EBX" + }, + { + "replacements": "0x00000000", + "mask": "0x00000000" + } + ], + [ + { + "leaf": "0x40000000", + "sub_leaf": { + "start": 0, + "end": 0 + }, + "register": "EAX" + }, + { + "replacements": "0x00000000", + "mask": "0xffffffff" + } + ], + [ + { + "leaf": "0x40000000", + "sub_leaf": { + "start": 0, + "end": 0 + }, + "register": "EBX" + }, + { + "replacements": "0x00000000", + "mask": "0xffffffff" + } + ], + [ + { + "leaf": "0x40000000", + "sub_leaf": { + "start": 0, + "end": 0 + }, + "register": "ECX" + }, + { + "replacements": "0x00000000", + "mask": "0xffffffff" + } + ], + [ + { + "leaf": "0x40000000", + "sub_leaf": { + "start": 0, + "end": 0 + }, + "register": "EDX" + }, + { + "replacements": "0x00000000", + "mask": "0xffffffff" + } + ], + [ + { + "leaf": "0x40000001", + "sub_leaf": { + "start": 0, + "end": 0 + }, + "register": "EAX" + }, + { + "replacements": "0x00000000", + "mask": "0x0103feff" + } + ], + [ + { + "leaf": "0x40000001", + "sub_leaf": { + "start": 0, + "end": 0 + }, + "register": "EDX" + }, + { + "replacements": "0x00000000", + "mask": "0x00000001" + } + ] + ] +} \ No newline at end of file diff --git a/arch/src/x86_64/cpu_profiles/skylake.json b/arch/src/x86_64/cpu_profiles/skylake.json new file mode 100644 index 0000000000..84ae4d99ee --- /dev/null +++ b/arch/src/x86_64/cpu_profiles/skylake.json @@ -0,0 +1,3184 @@ +{ + "hypervisor": "Kvm", + "cpu_vendor": "Intel", + "adjustments": [ + [ + { + "leaf": "0x00000000", + "sub_leaf": { + "start": 0, + "end": 0 + }, + "register": "EAX" + }, + { + "replacements": "0x00000016", + "mask": "0x00000000" + } + ], + [ + { + "leaf": "0x00000000", + "sub_leaf": { + "start": 0, + "end": 0 + }, + "register": "EBX" + }, + { + "replacements": "0x756e6547", + "mask": "0x00000000" + } + ], + [ + { + "leaf": "0x00000000", + "sub_leaf": { + "start": 0, + "end": 0 + }, + "register": "ECX" + }, + { + "replacements": "0x6c65746e", + "mask": "0x00000000" + } + ], + [ + { + "leaf": "0x00000000", + "sub_leaf": { + "start": 0, + "end": 0 + }, + "register": "EDX" + }, + { + "replacements": "0x49656e69", + "mask": "0x00000000" + } + ], + [ + { + "leaf": "0x00000001", + "sub_leaf": { + "start": 0, + "end": 0 + }, + "register": "EAX" + }, + { + "replacements": "0x00050654", + "mask": "0x00000000" + } + ], + [ + { + "leaf": "0x00000001", + "sub_leaf": { + "start": 0, + "end": 0 + }, + "register": "EBX" + }, + { + "replacements": "0x00000000", + "mask": "0x0000ff00" + } + ], + [ + { + "leaf": "0x00000001", + "sub_leaf": { + "start": 0, + "end": 0 + }, + "register": "ECX" + }, + { + "replacements": "0x76fa3223", + "mask": "0x80000000" + } + ], + [ + { + "leaf": "0x00000001", + "sub_leaf": { + "start": 0, + "end": 0 + }, + "register": "EDX" + }, + { + "replacements": "0x078bfbff", + "mask": "0x08000000" + } + ], + [ + { + "leaf": "0x00000002", + "sub_leaf": { + "start": 0, + "end": 0 + }, + "register": "EAX" + }, + { + "replacements": "0x00000000", + "mask": "0xffffffff" + } + ], + [ + { + "leaf": "0x00000002", + "sub_leaf": { + "start": 0, + "end": 0 + }, + "register": "EBX" + }, + { + "replacements": "0x00000000", + "mask": "0xffffffff" + } + ], + [ + { + "leaf": "0x00000002", + "sub_leaf": { + "start": 0, + "end": 0 + }, + "register": "ECX" + }, + { + "replacements": "0x00000000", + "mask": "0xffffffff" + } + ], + [ + { + "leaf": "0x00000002", + "sub_leaf": { + "start": 0, + "end": 0 + }, + "register": "EDX" + }, + { + "replacements": "0x00000000", + "mask": "0xffffffff" + } + ], + [ + { + "leaf": "0x00000004", + "sub_leaf": { + "start": 0, + "end": 0 + }, + "register": "EAX" + }, + { + "replacements": "0x00000000", + "mask": "0xffffc3ff" + } + ], + [ + { + "leaf": "0x00000004", + "sub_leaf": { + "start": 1, + "end": 1 + }, + "register": "EAX" + }, + { + "replacements": "0x00000000", + "mask": "0xffffc3ff" + } + ], + [ + { + "leaf": "0x00000004", + "sub_leaf": { + "start": 2, + "end": 2 + }, + "register": "EAX" + }, + { + "replacements": "0x00000000", + "mask": "0xffffc3ff" + } + ], + [ + { + "leaf": "0x00000004", + "sub_leaf": { + "start": 3, + "end": 3 + }, + "register": "EAX" + }, + { + "replacements": "0x00000000", + "mask": "0xffffc3ff" + } + ], + [ + { + "leaf": "0x00000004", + "sub_leaf": { + "start": 4, + "end": 4 + }, + "register": "EAX" + }, + { + "replacements": "0x00000000", + "mask": "0xffffc3ff" + } + ], + [ + { + "leaf": "0x00000004", + "sub_leaf": { + "start": 5, + "end": 4294967295 + }, + "register": "EAX" + }, + { + "replacements": "0x00000000", + "mask": "0xffffc3ff" + } + ], + [ + { + "leaf": "0x00000004", + "sub_leaf": { + "start": 0, + "end": 0 + }, + "register": "EBX" + }, + { + "replacements": "0x00000000", + "mask": "0xffffffff" + } + ], + [ + { + "leaf": "0x00000004", + "sub_leaf": { + "start": 1, + "end": 1 + }, + "register": "EBX" + }, + { + "replacements": "0x00000000", + "mask": "0xffffffff" + } + ], + [ + { + "leaf": "0x00000004", + "sub_leaf": { + "start": 2, + "end": 2 + }, + "register": "EBX" + }, + { + "replacements": "0x00000000", + "mask": "0xffffffff" + } + ], + [ + { + "leaf": "0x00000004", + "sub_leaf": { + "start": 3, + "end": 3 + }, + "register": "EBX" + }, + { + "replacements": "0x00000000", + "mask": "0xffffffff" + } + ], + [ + { + "leaf": "0x00000004", + "sub_leaf": { + "start": 4, + "end": 4 + }, + "register": "EBX" + }, + { + "replacements": "0x00000000", + "mask": "0xffffffff" + } + ], + [ + { + "leaf": "0x00000004", + "sub_leaf": { + "start": 5, + "end": 4294967295 + }, + "register": "EBX" + }, + { + "replacements": "0x00000000", + "mask": "0xffffffff" + } + ], + [ + { + "leaf": "0x00000004", + "sub_leaf": { + "start": 0, + "end": 0 + }, + "register": "ECX" + }, + { + "replacements": "0x00000000", + "mask": "0x7fffffff" + } + ], + [ + { + "leaf": "0x00000004", + "sub_leaf": { + "start": 1, + "end": 1 + }, + "register": "ECX" + }, + { + "replacements": "0x00000000", + "mask": "0x7fffffff" + } + ], + [ + { + "leaf": "0x00000004", + "sub_leaf": { + "start": 2, + "end": 2 + }, + "register": "ECX" + }, + { + "replacements": "0x00000000", + "mask": "0x7fffffff" + } + ], + [ + { + "leaf": "0x00000004", + "sub_leaf": { + "start": 3, + "end": 3 + }, + "register": "ECX" + }, + { + "replacements": "0x00000000", + "mask": "0x7fffffff" + } + ], + [ + { + "leaf": "0x00000004", + "sub_leaf": { + "start": 4, + "end": 4 + }, + "register": "ECX" + }, + { + "replacements": "0x00000000", + "mask": "0x7fffffff" + } + ], + [ + { + "leaf": "0x00000004", + "sub_leaf": { + "start": 5, + "end": 4294967295 + }, + "register": "ECX" + }, + { + "replacements": "0x00000000", + "mask": "0x7fffffff" + } + ], + [ + { + "leaf": "0x00000004", + "sub_leaf": { + "start": 0, + "end": 0 + }, + "register": "EDX" + }, + { + "replacements": "0x00000000", + "mask": "0x00000007" + } + ], + [ + { + "leaf": "0x00000004", + "sub_leaf": { + "start": 1, + "end": 1 + }, + "register": "EDX" + }, + { + "replacements": "0x00000000", + "mask": "0x00000007" + } + ], + [ + { + "leaf": "0x00000004", + "sub_leaf": { + "start": 2, + "end": 2 + }, + "register": "EDX" + }, + { + "replacements": "0x00000000", + "mask": "0x00000007" + } + ], + [ + { + "leaf": "0x00000004", + "sub_leaf": { + "start": 3, + "end": 3 + }, + "register": "EDX" + }, + { + "replacements": "0x00000000", + "mask": "0x00000007" + } + ], + [ + { + "leaf": "0x00000004", + "sub_leaf": { + "start": 4, + "end": 4 + }, + "register": "EDX" + }, + { + "replacements": "0x00000000", + "mask": "0x00000007" + } + ], + [ + { + "leaf": "0x00000004", + "sub_leaf": { + "start": 5, + "end": 4294967295 + }, + "register": "EDX" + }, + { + "replacements": "0x00000000", + "mask": "0x00000007" + } + ], + [ + { + "leaf": "0x00000005", + "sub_leaf": { + "start": 0, + "end": 0 + }, + "register": "EAX" + }, + { + "replacements": "0x00000000", + "mask": "0x00000000" + } + ], + [ + { + "leaf": "0x00000005", + "sub_leaf": { + "start": 0, + "end": 0 + }, + "register": "EBX" + }, + { + "replacements": "0x00000000", + "mask": "0x00000000" + } + ], + [ + { + "leaf": "0x00000005", + "sub_leaf": { + "start": 0, + "end": 0 + }, + "register": "ECX" + }, + { + "replacements": "0x00000000", + "mask": "0x00000000" + } + ], + [ + { + "leaf": "0x00000005", + "sub_leaf": { + "start": 0, + "end": 0 + }, + "register": "EDX" + }, + { + "replacements": "0x00000000", + "mask": "0x00000000" + } + ], + [ + { + "leaf": "0x00000006", + "sub_leaf": { + "start": 0, + "end": 0 + }, + "register": "EAX" + }, + { + "replacements": "0x00000004", + "mask": "0x00000000" + } + ], + [ + { + "leaf": "0x00000006", + "sub_leaf": { + "start": 0, + "end": 0 + }, + "register": "EBX" + }, + { + "replacements": "0x00000000", + "mask": "0x00000000" + } + ], + [ + { + "leaf": "0x00000006", + "sub_leaf": { + "start": 0, + "end": 0 + }, + "register": "ECX" + }, + { + "replacements": "0x00000000", + "mask": "0x00000000" + } + ], + [ + { + "leaf": "0x00000006", + "sub_leaf": { + "start": 0, + "end": 0 + }, + "register": "EDX" + }, + { + "replacements": "0x00000000", + "mask": "0x00000000" + } + ], + [ + { + "leaf": "0x00000007", + "sub_leaf": { + "start": 0, + "end": 0 + }, + "register": "EAX" + }, + { + "replacements": "0x00000000", + "mask": "0x00000000" + } + ], + [ + { + "leaf": "0x00000007", + "sub_leaf": { + "start": 0, + "end": 0 + }, + "register": "EBX" + }, + { + "replacements": "0xd19f07ab", + "mask": "0x00002040" + } + ], + [ + { + "leaf": "0x00000007", + "sub_leaf": { + "start": 0, + "end": 0 + }, + "register": "ECX" + }, + { + "replacements": "0x0000000c", + "mask": "0x00000000" + } + ], + [ + { + "leaf": "0x00000007", + "sub_leaf": { + "start": 0, + "end": 0 + }, + "register": "EDX" + }, + { + "replacements": "0xa4000000", + "mask": "0x18000400" + } + ], + [ + { + "leaf": "0x00000007", + "sub_leaf": { + "start": 1, + "end": 1 + }, + "register": "EAX" + }, + { + "replacements": "0x00000000", + "mask": "0x00000000" + } + ], + [ + { + "leaf": "0x00000007", + "sub_leaf": { + "start": 1, + "end": 1 + }, + "register": "EBX" + }, + { + "replacements": "0x00000000", + "mask": "0x00000000" + } + ], + [ + { + "leaf": "0x00000007", + "sub_leaf": { + "start": 1, + "end": 1 + }, + "register": "EDX" + }, + { + "replacements": "0x00000000", + "mask": "0x00000000" + } + ], + [ + { + "leaf": "0x00000007", + "sub_leaf": { + "start": 2, + "end": 2 + }, + "register": "EDX" + }, + { + "replacements": "0x00000000", + "mask": "0x00000000" + } + ], + [ + { + "leaf": "0x00000009", + "sub_leaf": { + "start": 0, + "end": 0 + }, + "register": "EAX" + }, + { + "replacements": "0x00000000", + "mask": "0x00000000" + } + ], + [ + { + "leaf": "0x0000000a", + "sub_leaf": { + "start": 0, + "end": 0 + }, + "register": "EAX" + }, + { + "replacements": "0x00000000", + "mask": "0x00000000" + } + ], + [ + { + "leaf": "0x0000000a", + "sub_leaf": { + "start": 0, + "end": 0 + }, + "register": "EBX" + }, + { + "replacements": "0x00000000", + "mask": "0x00000000" + } + ], + [ + { + "leaf": "0x0000000a", + "sub_leaf": { + "start": 0, + "end": 0 + }, + "register": "ECX" + }, + { + "replacements": "0x00000000", + "mask": "0x00000000" + } + ], + [ + { + "leaf": "0x0000000a", + "sub_leaf": { + "start": 0, + "end": 0 + }, + "register": "EDX" + }, + { + "replacements": "0x00000000", + "mask": "0x00000000" + } + ], + [ + { + "leaf": "0x0000000b", + "sub_leaf": { + "start": 0, + "end": 0 + }, + "register": "EAX" + }, + { + "replacements": "0x00000000", + "mask": "0x0000001f" + } + ], + [ + { + "leaf": "0x0000000b", + "sub_leaf": { + "start": 1, + "end": 4294967295 + }, + "register": "EAX" + }, + { + "replacements": "0x00000000", + "mask": "0x0000001f" + } + ], + [ + { + "leaf": "0x0000000b", + "sub_leaf": { + "start": 0, + "end": 0 + }, + "register": "EBX" + }, + { + "replacements": "0x00000000", + "mask": "0x0000ffff" + } + ], + [ + { + "leaf": "0x0000000b", + "sub_leaf": { + "start": 1, + "end": 4294967295 + }, + "register": "EBX" + }, + { + "replacements": "0x00000000", + "mask": "0x0000ffff" + } + ], + [ + { + "leaf": "0x0000000b", + "sub_leaf": { + "start": 0, + "end": 0 + }, + "register": "ECX" + }, + { + "replacements": "0x00000000", + "mask": "0x0000ffff" + } + ], + [ + { + "leaf": "0x0000000b", + "sub_leaf": { + "start": 1, + "end": 4294967295 + }, + "register": "ECX" + }, + { + "replacements": "0x00000000", + "mask": "0x0000ffff" + } + ], + [ + { + "leaf": "0x0000000b", + "sub_leaf": { + "start": 0, + "end": 0 + }, + "register": "EDX" + }, + { + "replacements": "0x00000000", + "mask": "0xffffffff" + } + ], + [ + { + "leaf": "0x0000000b", + "sub_leaf": { + "start": 1, + "end": 4294967295 + }, + "register": "EDX" + }, + { + "replacements": "0x00000000", + "mask": "0xffffffff" + } + ], + [ + { + "leaf": "0x0000000d", + "sub_leaf": { + "start": 0, + "end": 0 + }, + "register": "EAX" + }, + { + "replacements": "0x000002e7", + "mask": "0x00000000" + } + ], + [ + { + "leaf": "0x0000000d", + "sub_leaf": { + "start": 0, + "end": 0 + }, + "register": "EBX" + }, + { + "replacements": "0x00000000", + "mask": "0xffffffff" + } + ], + [ + { + "leaf": "0x0000000d", + "sub_leaf": { + "start": 0, + "end": 0 + }, + "register": "ECX" + }, + { + "replacements": "0x00000000", + "mask": "0xffffffff" + } + ], + [ + { + "leaf": "0x0000000d", + "sub_leaf": { + "start": 0, + "end": 0 + }, + "register": "EDX" + }, + { + "replacements": "0x00000000", + "mask": "0x00000000" + } + ], + [ + { + "leaf": "0x0000000d", + "sub_leaf": { + "start": 1, + "end": 1 + }, + "register": "EAX" + }, + { + "replacements": "0x0000000f", + "mask": "0x00000000" + } + ], + [ + { + "leaf": "0x0000000d", + "sub_leaf": { + "start": 1, + "end": 1 + }, + "register": "EBX" + }, + { + "replacements": "0x00000000", + "mask": "0xffffffff" + } + ], + [ + { + "leaf": "0x0000000d", + "sub_leaf": { + "start": 1, + "end": 1 + }, + "register": "ECX" + }, + { + "replacements": "0x00000000", + "mask": "0x00000000" + } + ], + [ + { + "leaf": "0x0000000d", + "sub_leaf": { + "start": 1, + "end": 1 + }, + "register": "EDX" + }, + { + "replacements": "0x00000000", + "mask": "0x00000000" + } + ], + [ + { + "leaf": "0x0000000d", + "sub_leaf": { + "start": 2, + "end": 2 + }, + "register": "EAX" + }, + { + "replacements": "0x00000100", + "mask": "0x00000000" + } + ], + [ + { + "leaf": "0x0000000d", + "sub_leaf": { + "start": 3, + "end": 3 + }, + "register": "EAX" + }, + { + "replacements": "0x00000040", + "mask": "0x00000000" + } + ], + [ + { + "leaf": "0x0000000d", + "sub_leaf": { + "start": 4, + "end": 4 + }, + "register": "EAX" + }, + { + "replacements": "0x00000040", + "mask": "0x00000000" + } + ], + [ + { + "leaf": "0x0000000d", + "sub_leaf": { + "start": 5, + "end": 5 + }, + "register": "EAX" + }, + { + "replacements": "0x00000040", + "mask": "0x00000000" + } + ], + [ + { + "leaf": "0x0000000d", + "sub_leaf": { + "start": 6, + "end": 6 + }, + "register": "EAX" + }, + { + "replacements": "0x00000200", + "mask": "0x00000000" + } + ], + [ + { + "leaf": "0x0000000d", + "sub_leaf": { + "start": 7, + "end": 7 + }, + "register": "EAX" + }, + { + "replacements": "0x00000400", + "mask": "0x00000000" + } + ], + [ + { + "leaf": "0x0000000d", + "sub_leaf": { + "start": 8, + "end": 8 + }, + "register": "EAX" + }, + { + "replacements": "0x00000000", + "mask": "0x00000000" + } + ], + [ + { + "leaf": "0x0000000d", + "sub_leaf": { + "start": 9, + "end": 9 + }, + "register": "EAX" + }, + { + "replacements": "0x00000008", + "mask": "0x00000000" + } + ], + [ + { + "leaf": "0x0000000d", + "sub_leaf": { + "start": 10, + "end": 63 + }, + "register": "EAX" + }, + { + "replacements": "0x00000000", + "mask": "0x00000000" + } + ], + [ + { + "leaf": "0x0000000d", + "sub_leaf": { + "start": 2, + "end": 2 + }, + "register": "EBX" + }, + { + "replacements": "0x00000240", + "mask": "0x00000000" + } + ], + [ + { + "leaf": "0x0000000d", + "sub_leaf": { + "start": 3, + "end": 3 + }, + "register": "EBX" + }, + { + "replacements": "0x000003c0", + "mask": "0x00000000" + } + ], + [ + { + "leaf": "0x0000000d", + "sub_leaf": { + "start": 4, + "end": 4 + }, + "register": "EBX" + }, + { + "replacements": "0x00000400", + "mask": "0x00000000" + } + ], + [ + { + "leaf": "0x0000000d", + "sub_leaf": { + "start": 5, + "end": 5 + }, + "register": "EBX" + }, + { + "replacements": "0x00000440", + "mask": "0x00000000" + } + ], + [ + { + "leaf": "0x0000000d", + "sub_leaf": { + "start": 6, + "end": 6 + }, + "register": "EBX" + }, + { + "replacements": "0x00000480", + "mask": "0x00000000" + } + ], + [ + { + "leaf": "0x0000000d", + "sub_leaf": { + "start": 7, + "end": 7 + }, + "register": "EBX" + }, + { + "replacements": "0x00000680", + "mask": "0x00000000" + } + ], + [ + { + "leaf": "0x0000000d", + "sub_leaf": { + "start": 8, + "end": 8 + }, + "register": "EBX" + }, + { + "replacements": "0x00000000", + "mask": "0x00000000" + } + ], + [ + { + "leaf": "0x0000000d", + "sub_leaf": { + "start": 9, + "end": 9 + }, + "register": "EBX" + }, + { + "replacements": "0x00000a80", + "mask": "0x00000000" + } + ], + [ + { + "leaf": "0x0000000d", + "sub_leaf": { + "start": 10, + "end": 63 + }, + "register": "EBX" + }, + { + "replacements": "0x00000000", + "mask": "0x00000000" + } + ], + [ + { + "leaf": "0x0000000d", + "sub_leaf": { + "start": 2, + "end": 2 + }, + "register": "ECX" + }, + { + "replacements": "0x00000000", + "mask": "0x00000000" + } + ], + [ + { + "leaf": "0x0000000d", + "sub_leaf": { + "start": 3, + "end": 3 + }, + "register": "ECX" + }, + { + "replacements": "0x00000000", + "mask": "0x00000000" + } + ], + [ + { + "leaf": "0x0000000d", + "sub_leaf": { + "start": 4, + "end": 4 + }, + "register": "ECX" + }, + { + "replacements": "0x00000000", + "mask": "0x00000000" + } + ], + [ + { + "leaf": "0x0000000d", + "sub_leaf": { + "start": 5, + "end": 5 + }, + "register": "ECX" + }, + { + "replacements": "0x00000000", + "mask": "0x00000000" + } + ], + [ + { + "leaf": "0x0000000d", + "sub_leaf": { + "start": 6, + "end": 6 + }, + "register": "ECX" + }, + { + "replacements": "0x00000000", + "mask": "0x00000000" + } + ], + [ + { + "leaf": "0x0000000d", + "sub_leaf": { + "start": 7, + "end": 7 + }, + "register": "ECX" + }, + { + "replacements": "0x00000000", + "mask": "0x00000000" + } + ], + [ + { + "leaf": "0x0000000d", + "sub_leaf": { + "start": 8, + "end": 8 + }, + "register": "ECX" + }, + { + "replacements": "0x00000000", + "mask": "0x00000000" + } + ], + [ + { + "leaf": "0x0000000d", + "sub_leaf": { + "start": 9, + "end": 9 + }, + "register": "ECX" + }, + { + "replacements": "0x00000000", + "mask": "0x00000000" + } + ], + [ + { + "leaf": "0x0000000d", + "sub_leaf": { + "start": 10, + "end": 63 + }, + "register": "ECX" + }, + { + "replacements": "0x00000000", + "mask": "0x00000000" + } + ], + [ + { + "leaf": "0x0000000d", + "sub_leaf": { + "start": 3, + "end": 3 + }, + "register": "EAX" + }, + { + "replacements": "0x00000000", + "mask": "0x00000000" + } + ], + [ + { + "leaf": "0x0000000d", + "sub_leaf": { + "start": 4, + "end": 4 + }, + "register": "EAX" + }, + { + "replacements": "0x00000000", + "mask": "0x00000000" + } + ], + [ + { + "leaf": "0x0000000d", + "sub_leaf": { + "start": 3, + "end": 3 + }, + "register": "EBX" + }, + { + "replacements": "0x00000000", + "mask": "0x00000000" + } + ], + [ + { + "leaf": "0x0000000d", + "sub_leaf": { + "start": 4, + "end": 4 + }, + "register": "EBX" + }, + { + "replacements": "0x00000000", + "mask": "0x00000000" + } + ], + [ + { + "leaf": "0x0000000d", + "sub_leaf": { + "start": 3, + "end": 3 + }, + "register": "ECX" + }, + { + "replacements": "0x00000000", + "mask": "0x00000000" + } + ], + [ + { + "leaf": "0x0000000d", + "sub_leaf": { + "start": 4, + "end": 4 + }, + "register": "ECX" + }, + { + "replacements": "0x00000000", + "mask": "0x00000000" + } + ], + [ + { + "leaf": "0x0000000d", + "sub_leaf": { + "start": 3, + "end": 3 + }, + "register": "EDX" + }, + { + "replacements": "0x00000000", + "mask": "0x00000000" + } + ], + [ + { + "leaf": "0x0000000d", + "sub_leaf": { + "start": 4, + "end": 4 + }, + "register": "EDX" + }, + { + "replacements": "0x00000000", + "mask": "0x00000000" + } + ], + [ + { + "leaf": "0x0000000d", + "sub_leaf": { + "start": 5, + "end": 5 + }, + "register": "EAX" + }, + { + "replacements": "0x00000040", + "mask": "0x00000000" + } + ], + [ + { + "leaf": "0x0000000d", + "sub_leaf": { + "start": 6, + "end": 6 + }, + "register": "EAX" + }, + { + "replacements": "0x00000200", + "mask": "0x00000000" + } + ], + [ + { + "leaf": "0x0000000d", + "sub_leaf": { + "start": 7, + "end": 7 + }, + "register": "EAX" + }, + { + "replacements": "0x00000400", + "mask": "0x00000000" + } + ], + [ + { + "leaf": "0x0000000d", + "sub_leaf": { + "start": 8, + "end": 8 + }, + "register": "EAX" + }, + { + "replacements": "0x00000000", + "mask": "0x00000000" + } + ], + [ + { + "leaf": "0x0000000d", + "sub_leaf": { + "start": 9, + "end": 9 + }, + "register": "EAX" + }, + { + "replacements": "0x00000008", + "mask": "0x00000000" + } + ], + [ + { + "leaf": "0x0000000d", + "sub_leaf": { + "start": 10, + "end": 63 + }, + "register": "EAX" + }, + { + "replacements": "0x00000000", + "mask": "0x00000000" + } + ], + [ + { + "leaf": "0x0000000d", + "sub_leaf": { + "start": 5, + "end": 5 + }, + "register": "EBX" + }, + { + "replacements": "0x00000440", + "mask": "0x00000000" + } + ], + [ + { + "leaf": "0x0000000d", + "sub_leaf": { + "start": 6, + "end": 6 + }, + "register": "EBX" + }, + { + "replacements": "0x00000480", + "mask": "0x00000000" + } + ], + [ + { + "leaf": "0x0000000d", + "sub_leaf": { + "start": 7, + "end": 7 + }, + "register": "EBX" + }, + { + "replacements": "0x00000680", + "mask": "0x00000000" + } + ], + [ + { + "leaf": "0x0000000d", + "sub_leaf": { + "start": 8, + "end": 8 + }, + "register": "EBX" + }, + { + "replacements": "0x00000000", + "mask": "0x00000000" + } + ], + [ + { + "leaf": "0x0000000d", + "sub_leaf": { + "start": 9, + "end": 9 + }, + "register": "EBX" + }, + { + "replacements": "0x00000a80", + "mask": "0x00000000" + } + ], + [ + { + "leaf": "0x0000000d", + "sub_leaf": { + "start": 10, + "end": 63 + }, + "register": "EBX" + }, + { + "replacements": "0x00000000", + "mask": "0x00000000" + } + ], + [ + { + "leaf": "0x0000000d", + "sub_leaf": { + "start": 5, + "end": 5 + }, + "register": "ECX" + }, + { + "replacements": "0x00000000", + "mask": "0x00000000" + } + ], + [ + { + "leaf": "0x0000000d", + "sub_leaf": { + "start": 6, + "end": 6 + }, + "register": "ECX" + }, + { + "replacements": "0x00000000", + "mask": "0x00000000" + } + ], + [ + { + "leaf": "0x0000000d", + "sub_leaf": { + "start": 7, + "end": 7 + }, + "register": "ECX" + }, + { + "replacements": "0x00000000", + "mask": "0x00000000" + } + ], + [ + { + "leaf": "0x0000000d", + "sub_leaf": { + "start": 8, + "end": 8 + }, + "register": "ECX" + }, + { + "replacements": "0x00000000", + "mask": "0x00000000" + } + ], + [ + { + "leaf": "0x0000000d", + "sub_leaf": { + "start": 9, + "end": 9 + }, + "register": "ECX" + }, + { + "replacements": "0x00000000", + "mask": "0x00000000" + } + ], + [ + { + "leaf": "0x0000000d", + "sub_leaf": { + "start": 10, + "end": 63 + }, + "register": "ECX" + }, + { + "replacements": "0x00000000", + "mask": "0x00000000" + } + ], + [ + { + "leaf": "0x0000000f", + "sub_leaf": { + "start": 0, + "end": 0 + }, + "register": "EBX" + }, + { + "replacements": "0x00000000", + "mask": "0x00000000" + } + ], + [ + { + "leaf": "0x0000000f", + "sub_leaf": { + "start": 0, + "end": 0 + }, + "register": "EDX" + }, + { + "replacements": "0x00000000", + "mask": "0x00000000" + } + ], + [ + { + "leaf": "0x0000000f", + "sub_leaf": { + "start": 1, + "end": 1 + }, + "register": "EAX" + }, + { + "replacements": "0x00000000", + "mask": "0x00000000" + } + ], + [ + { + "leaf": "0x0000000f", + "sub_leaf": { + "start": 1, + "end": 1 + }, + "register": "EBX" + }, + { + "replacements": "0x00000000", + "mask": "0x00000000" + } + ], + [ + { + "leaf": "0x0000000f", + "sub_leaf": { + "start": 1, + "end": 1 + }, + "register": "ECX" + }, + { + "replacements": "0x00000000", + "mask": "0x00000000" + } + ], + [ + { + "leaf": "0x0000000f", + "sub_leaf": { + "start": 1, + "end": 1 + }, + "register": "EDX" + }, + { + "replacements": "0x00000000", + "mask": "0x00000000" + } + ], + [ + { + "leaf": "0x00000010", + "sub_leaf": { + "start": 0, + "end": 0 + }, + "register": "EBX" + }, + { + "replacements": "0x00000000", + "mask": "0x00000000" + } + ], + [ + { + "leaf": "0x00000010", + "sub_leaf": { + "start": 1, + "end": 1 + }, + "register": "EAX" + }, + { + "replacements": "0x00000000", + "mask": "0x0000001f" + } + ], + [ + { + "leaf": "0x00000010", + "sub_leaf": { + "start": 1, + "end": 1 + }, + "register": "EBX" + }, + { + "replacements": "0x00000000", + "mask": "0xffffffff" + } + ], + [ + { + "leaf": "0x00000010", + "sub_leaf": { + "start": 1, + "end": 1 + }, + "register": "ECX" + }, + { + "replacements": "0x00000000", + "mask": "0x00000000" + } + ], + [ + { + "leaf": "0x00000010", + "sub_leaf": { + "start": 1, + "end": 1 + }, + "register": "EDX" + }, + { + "replacements": "0x00000000", + "mask": "0x00000000" + } + ], + [ + { + "leaf": "0x00000010", + "sub_leaf": { + "start": 2, + "end": 2 + }, + "register": "EAX" + }, + { + "replacements": "0x00000000", + "mask": "0x0000001f" + } + ], + [ + { + "leaf": "0x00000010", + "sub_leaf": { + "start": 2, + "end": 2 + }, + "register": "EBX" + }, + { + "replacements": "0x00000000", + "mask": "0xffffffff" + } + ], + [ + { + "leaf": "0x00000010", + "sub_leaf": { + "start": 2, + "end": 2 + }, + "register": "EDX" + }, + { + "replacements": "0x00000000", + "mask": "0x00000000" + } + ], + [ + { + "leaf": "0x00000010", + "sub_leaf": { + "start": 2, + "end": 2 + }, + "register": "ECX" + }, + { + "replacements": "0x00000000", + "mask": "0x00000000" + } + ], + [ + { + "leaf": "0x00000010", + "sub_leaf": { + "start": 3, + "end": 3 + }, + "register": "EAX" + }, + { + "replacements": "0x00000000", + "mask": "0x00000000" + } + ], + [ + { + "leaf": "0x00000010", + "sub_leaf": { + "start": 3, + "end": 3 + }, + "register": "ECX" + }, + { + "replacements": "0x00000000", + "mask": "0x00000000" + } + ], + [ + { + "leaf": "0x00000010", + "sub_leaf": { + "start": 3, + "end": 3 + }, + "register": "EDX" + }, + { + "replacements": "0x00000000", + "mask": "0x00000000" + } + ], + [ + { + "leaf": "0x00000010", + "sub_leaf": { + "start": 5, + "end": 5 + }, + "register": "EAX" + }, + { + "replacements": "0x00000000", + "mask": "0x00000000" + } + ], + [ + { + "leaf": "0x00000010", + "sub_leaf": { + "start": 5, + "end": 5 + }, + "register": "ECX" + }, + { + "replacements": "0x00000000", + "mask": "0x00000000" + } + ], + [ + { + "leaf": "0x00000010", + "sub_leaf": { + "start": 5, + "end": 5 + }, + "register": "EDX" + }, + { + "replacements": "0x00000000", + "mask": "0x00000000" + } + ], + [ + { + "leaf": "0x00000014", + "sub_leaf": { + "start": 0, + "end": 0 + }, + "register": "EAX" + }, + { + "replacements": "0x00000000", + "mask": "0x00000000" + } + ], + [ + { + "leaf": "0x00000014", + "sub_leaf": { + "start": 0, + "end": 0 + }, + "register": "EBX" + }, + { + "replacements": "0x00000000", + "mask": "0x00000000" + } + ], + [ + { + "leaf": "0x00000014", + "sub_leaf": { + "start": 0, + "end": 0 + }, + "register": "ECX" + }, + { + "replacements": "0x00000000", + "mask": "0x00000000" + } + ], + [ + { + "leaf": "0x00000014", + "sub_leaf": { + "start": 1, + "end": 1 + }, + "register": "EAX" + }, + { + "replacements": "0x00000000", + "mask": "0x00000000" + } + ], + [ + { + "leaf": "0x00000014", + "sub_leaf": { + "start": 1, + "end": 1 + }, + "register": "EBX" + }, + { + "replacements": "0x00000000", + "mask": "0x00000000" + } + ], + [ + { + "leaf": "0x00000015", + "sub_leaf": { + "start": 0, + "end": 0 + }, + "register": "EAX" + }, + { + "replacements": "0x00000000", + "mask": "0xffffffff" + } + ], + [ + { + "leaf": "0x00000015", + "sub_leaf": { + "start": 0, + "end": 0 + }, + "register": "EBX" + }, + { + "replacements": "0x00000000", + "mask": "0xffffffff" + } + ], + [ + { + "leaf": "0x00000015", + "sub_leaf": { + "start": 0, + "end": 0 + }, + "register": "ECX" + }, + { + "replacements": "0x00000000", + "mask": "0xffffffff" + } + ], + [ + { + "leaf": "0x00000016", + "sub_leaf": { + "start": 0, + "end": 0 + }, + "register": "EAX" + }, + { + "replacements": "0x00000000", + "mask": "0x0000ffff" + } + ], + [ + { + "leaf": "0x00000016", + "sub_leaf": { + "start": 0, + "end": 0 + }, + "register": "EBX" + }, + { + "replacements": "0x00000000", + "mask": "0x0000ffff" + } + ], + [ + { + "leaf": "0x00000016", + "sub_leaf": { + "start": 0, + "end": 0 + }, + "register": "ECX" + }, + { + "replacements": "0x00000000", + "mask": "0x0000ffff" + } + ], + [ + { + "leaf": "0x00000017", + "sub_leaf": { + "start": 0, + "end": 0 + }, + "register": "EAX" + }, + { + "replacements": "0x00000000", + "mask": "0x00000000" + } + ], + [ + { + "leaf": "0x00000018", + "sub_leaf": { + "start": 0, + "end": 0 + }, + "register": "EAX" + }, + { + "replacements": "0x00000000", + "mask": "0xffffffff" + } + ], + [ + { + "leaf": "0x00000018", + "sub_leaf": { + "start": 0, + "end": 4294967295 + }, + "register": "EBX" + }, + { + "replacements": "0x00000000", + "mask": "0xffff070f" + } + ], + [ + { + "leaf": "0x00000018", + "sub_leaf": { + "start": 0, + "end": 4294967295 + }, + "register": "ECX" + }, + { + "replacements": "0x00000000", + "mask": "0xffffffff" + } + ], + [ + { + "leaf": "0x00000018", + "sub_leaf": { + "start": 0, + "end": 4294967295 + }, + "register": "EDX" + }, + { + "replacements": "0x00000000", + "mask": "0x03ffc1ff" + } + ], + [ + { + "leaf": "0x0000001c", + "sub_leaf": { + "start": 0, + "end": 0 + }, + "register": "EAX" + }, + { + "replacements": "0x00000000", + "mask": "0x00000000" + } + ], + [ + { + "leaf": "0x0000001c", + "sub_leaf": { + "start": 0, + "end": 0 + }, + "register": "EBX" + }, + { + "replacements": "0x00000000", + "mask": "0x00000000" + } + ], + [ + { + "leaf": "0x0000001c", + "sub_leaf": { + "start": 0, + "end": 0 + }, + "register": "ECX" + }, + { + "replacements": "0x00000000", + "mask": "0x00000000" + } + ], + [ + { + "leaf": "0x0000001d", + "sub_leaf": { + "start": 0, + "end": 0 + }, + "register": "EAX" + }, + { + "replacements": "0x00000000", + "mask": "0x00000000" + } + ], + [ + { + "leaf": "0x0000001d", + "sub_leaf": { + "start": 1, + "end": 1 + }, + "register": "EAX" + }, + { + "replacements": "0x00000000", + "mask": "0x00000000" + } + ], + [ + { + "leaf": "0x0000001d", + "sub_leaf": { + "start": 1, + "end": 1 + }, + "register": "EBX" + }, + { + "replacements": "0x00000000", + "mask": "0x00000000" + } + ], + [ + { + "leaf": "0x0000001d", + "sub_leaf": { + "start": 1, + "end": 1 + }, + "register": "ECX" + }, + { + "replacements": "0x00000000", + "mask": "0x00000000" + } + ], + [ + { + "leaf": "0x0000001e", + "sub_leaf": { + "start": 0, + "end": 0 + }, + "register": "EAX" + }, + { + "replacements": "0x00000000", + "mask": "0x00000000" + } + ], + [ + { + "leaf": "0x0000001e", + "sub_leaf": { + "start": 0, + "end": 0 + }, + "register": "EBX" + }, + { + "replacements": "0x00000000", + "mask": "0x00000000" + } + ], + [ + { + "leaf": "0x0000001e", + "sub_leaf": { + "start": 1, + "end": 1 + }, + "register": "EAX" + }, + { + "replacements": "0x00000000", + "mask": "0x00000000" + } + ], + [ + { + "leaf": "0x0000001f", + "sub_leaf": { + "start": 0, + "end": 4294967295 + }, + "register": "EAX" + }, + { + "replacements": "0x00000000", + "mask": "0x0000001f" + } + ], + [ + { + "leaf": "0x0000001f", + "sub_leaf": { + "start": 0, + "end": 4294967295 + }, + "register": "EBX" + }, + { + "replacements": "0x00000000", + "mask": "0x0000ffff" + } + ], + [ + { + "leaf": "0x0000001f", + "sub_leaf": { + "start": 0, + "end": 4294967295 + }, + "register": "ECX" + }, + { + "replacements": "0x00000000", + "mask": "0x0000ffff" + } + ], + [ + { + "leaf": "0x0000001f", + "sub_leaf": { + "start": 0, + "end": 4294967295 + }, + "register": "EDX" + }, + { + "replacements": "0x00000000", + "mask": "0xffffffff" + } + ], + [ + { + "leaf": "0x00000020", + "sub_leaf": { + "start": 0, + "end": 0 + }, + "register": "EAX" + }, + { + "replacements": "0x00000000", + "mask": "0x00000000" + } + ], + [ + { + "leaf": "0x00000020", + "sub_leaf": { + "start": 0, + "end": 0 + }, + "register": "EBX" + }, + { + "replacements": "0x00000000", + "mask": "0x00000000" + } + ], + [ + { + "leaf": "0x00000021", + "sub_leaf": { + "start": 0, + "end": 0 + }, + "register": "EBX" + }, + { + "replacements": "0x00000000", + "mask": "0x00000000" + } + ], + [ + { + "leaf": "0x00000021", + "sub_leaf": { + "start": 0, + "end": 0 + }, + "register": "ECX" + }, + { + "replacements": "0x00000000", + "mask": "0x00000000" + } + ], + [ + { + "leaf": "0x00000021", + "sub_leaf": { + "start": 0, + "end": 0 + }, + "register": "EDX" + }, + { + "replacements": "0x00000000", + "mask": "0x00000000" + } + ], + [ + { + "leaf": "0x00000023", + "sub_leaf": { + "start": 0, + "end": 0 + }, + "register": "EAX" + }, + { + "replacements": "0x00000000", + "mask": "0x00000000" + } + ], + [ + { + "leaf": "0x00000023", + "sub_leaf": { + "start": 0, + "end": 0 + }, + "register": "EBX" + }, + { + "replacements": "0x00000000", + "mask": "0x00000000" + } + ], + [ + { + "leaf": "0x00000023", + "sub_leaf": { + "start": 0, + "end": 0 + }, + "register": "ECX" + }, + { + "replacements": "0x00000000", + "mask": "0x00000000" + } + ], + [ + { + "leaf": "0x00000023", + "sub_leaf": { + "start": 1, + "end": 1 + }, + "register": "EAX" + }, + { + "replacements": "0x00000000", + "mask": "0x00000000" + } + ], + [ + { + "leaf": "0x00000023", + "sub_leaf": { + "start": 1, + "end": 1 + }, + "register": "EBX" + }, + { + "replacements": "0x00000000", + "mask": "0x00000000" + } + ], + [ + { + "leaf": "0x00000023", + "sub_leaf": { + "start": 2, + "end": 2 + }, + "register": "EAX" + }, + { + "replacements": "0x00000000", + "mask": "0x00000000" + } + ], + [ + { + "leaf": "0x00000023", + "sub_leaf": { + "start": 3, + "end": 3 + }, + "register": "EAX" + }, + { + "replacements": "0x00000000", + "mask": "0x00000000" + } + ], + [ + { + "leaf": "0x00000023", + "sub_leaf": { + "start": 4, + "end": 4 + }, + "register": "EBX" + }, + { + "replacements": "0x00000000", + "mask": "0x00000000" + } + ], + [ + { + "leaf": "0x00000023", + "sub_leaf": { + "start": 4, + "end": 4 + }, + "register": "EBX" + }, + { + "replacements": "0x00000000", + "mask": "0x00000000" + } + ], + [ + { + "leaf": "0x00000023", + "sub_leaf": { + "start": 5, + "end": 5 + }, + "register": "EAX" + }, + { + "replacements": "0x00000000", + "mask": "0x00000000" + } + ], + [ + { + "leaf": "0x00000023", + "sub_leaf": { + "start": 5, + "end": 5 + }, + "register": "EBX" + }, + { + "replacements": "0x00000000", + "mask": "0x00000000" + } + ], + [ + { + "leaf": "0x00000023", + "sub_leaf": { + "start": 5, + "end": 5 + }, + "register": "ECX" + }, + { + "replacements": "0x00000000", + "mask": "0x00000000" + } + ], + [ + { + "leaf": "0x00000023", + "sub_leaf": { + "start": 5, + "end": 5 + }, + "register": "EDX" + }, + { + "replacements": "0x00000000", + "mask": "0x00000000" + } + ], + [ + { + "leaf": "0x00000024", + "sub_leaf": { + "start": 0, + "end": 0 + }, + "register": "EAX" + }, + { + "replacements": "0x00000000", + "mask": "0x00000000" + } + ], + [ + { + "leaf": "0x00000024", + "sub_leaf": { + "start": 0, + "end": 0 + }, + "register": "EBX" + }, + { + "replacements": "0x00000000", + "mask": "0x00000000" + } + ], + [ + { + "leaf": "0x80000000", + "sub_leaf": { + "start": 0, + "end": 0 + }, + "register": "EAX" + }, + { + "replacements": "0x80000008", + "mask": "0x00000000" + } + ], + [ + { + "leaf": "0x80000000", + "sub_leaf": { + "start": 0, + "end": 0 + }, + "register": "EBX" + }, + { + "replacements": "0x00000000", + "mask": "0xffffffff" + } + ], + [ + { + "leaf": "0x80000000", + "sub_leaf": { + "start": 0, + "end": 0 + }, + "register": "ECX" + }, + { + "replacements": "0x00000000", + "mask": "0xffffffff" + } + ], + [ + { + "leaf": "0x80000000", + "sub_leaf": { + "start": 0, + "end": 0 + }, + "register": "EDX" + }, + { + "replacements": "0x00000000", + "mask": "0xffffffff" + } + ], + [ + { + "leaf": "0x80000001", + "sub_leaf": { + "start": 0, + "end": 0 + }, + "register": "EAX" + }, + { + "replacements": "0x00000000", + "mask": "0x0fff3fff" + } + ], + [ + { + "leaf": "0x80000001", + "sub_leaf": { + "start": 0, + "end": 0 + }, + "register": "EBX" + }, + { + "replacements": "0x00000000", + "mask": "0xf000ffff" + } + ], + [ + { + "leaf": "0x80000001", + "sub_leaf": { + "start": 0, + "end": 0 + }, + "register": "ECX" + }, + { + "replacements": "0x00000121", + "mask": "0x00000000" + } + ], + [ + { + "leaf": "0x80000001", + "sub_leaf": { + "start": 0, + "end": 0 + }, + "register": "EDX" + }, + { + "replacements": "0x2c100800", + "mask": "0x00000000" + } + ], + [ + { + "leaf": "0x80000002", + "sub_leaf": { + "start": 0, + "end": 0 + }, + "register": "EAX" + }, + { + "replacements": "0x65746e49", + "mask": "0x00000000" + } + ], + [ + { + "leaf": "0x80000002", + "sub_leaf": { + "start": 0, + "end": 0 + }, + "register": "EBX" + }, + { + "replacements": "0x6b53206c", + "mask": "0x00000000" + } + ], + [ + { + "leaf": "0x80000002", + "sub_leaf": { + "start": 0, + "end": 0 + }, + "register": "ECX" + }, + { + "replacements": "0x6b616c79", + "mask": "0x00000000" + } + ], + [ + { + "leaf": "0x80000002", + "sub_leaf": { + "start": 0, + "end": 0 + }, + "register": "EDX" + }, + { + "replacements": "0x00000065", + "mask": "0x00000000" + } + ], + [ + { + "leaf": "0x80000003", + "sub_leaf": { + "start": 0, + "end": 0 + }, + "register": "EAX" + }, + { + "replacements": "0x00000000", + "mask": "0x00000000" + } + ], + [ + { + "leaf": "0x80000003", + "sub_leaf": { + "start": 0, + "end": 0 + }, + "register": "EBX" + }, + { + "replacements": "0x00000000", + "mask": "0x00000000" + } + ], + [ + { + "leaf": "0x80000003", + "sub_leaf": { + "start": 0, + "end": 0 + }, + "register": "ECX" + }, + { + "replacements": "0x00000000", + "mask": "0x00000000" + } + ], + [ + { + "leaf": "0x80000003", + "sub_leaf": { + "start": 0, + "end": 0 + }, + "register": "EDX" + }, + { + "replacements": "0x00000000", + "mask": "0x00000000" + } + ], + [ + { + "leaf": "0x80000004", + "sub_leaf": { + "start": 0, + "end": 0 + }, + "register": "EAX" + }, + { + "replacements": "0x00000000", + "mask": "0x00000000" + } + ], + [ + { + "leaf": "0x80000004", + "sub_leaf": { + "start": 0, + "end": 0 + }, + "register": "EBX" + }, + { + "replacements": "0x00000000", + "mask": "0x00000000" + } + ], + [ + { + "leaf": "0x80000004", + "sub_leaf": { + "start": 0, + "end": 0 + }, + "register": "ECX" + }, + { + "replacements": "0x00000000", + "mask": "0x00000000" + } + ], + [ + { + "leaf": "0x80000004", + "sub_leaf": { + "start": 0, + "end": 0 + }, + "register": "EDX" + }, + { + "replacements": "0x00000000", + "mask": "0x00000000" + } + ], + [ + { + "leaf": "0x80000006", + "sub_leaf": { + "start": 0, + "end": 0 + }, + "register": "ECX" + }, + { + "replacements": "0x00000000", + "mask": "0xffffffff" + } + ], + [ + { + "leaf": "0x80000007", + "sub_leaf": { + "start": 0, + "end": 0 + }, + "register": "EDX" + }, + { + "replacements": "0x00000100", + "mask": "0x00000000" + } + ], + [ + { + "leaf": "0x80000008", + "sub_leaf": { + "start": 0, + "end": 0 + }, + "register": "EAX" + }, + { + "replacements": "0x00000000", + "mask": "0x00ffffff" + } + ], + [ + { + "leaf": "0x80000008", + "sub_leaf": { + "start": 0, + "end": 0 + }, + "register": "EBX" + }, + { + "replacements": "0x00000000", + "mask": "0x00000000" + } + ], + [ + { + "leaf": "0x40000000", + "sub_leaf": { + "start": 0, + "end": 0 + }, + "register": "EAX" + }, + { + "replacements": "0x00000000", + "mask": "0xffffffff" + } + ], + [ + { + "leaf": "0x40000000", + "sub_leaf": { + "start": 0, + "end": 0 + }, + "register": "EBX" + }, + { + "replacements": "0x00000000", + "mask": "0xffffffff" + } + ], + [ + { + "leaf": "0x40000000", + "sub_leaf": { + "start": 0, + "end": 0 + }, + "register": "ECX" + }, + { + "replacements": "0x00000000", + "mask": "0xffffffff" + } + ], + [ + { + "leaf": "0x40000000", + "sub_leaf": { + "start": 0, + "end": 0 + }, + "register": "EDX" + }, + { + "replacements": "0x00000000", + "mask": "0xffffffff" + } + ], + [ + { + "leaf": "0x40000001", + "sub_leaf": { + "start": 0, + "end": 0 + }, + "register": "EAX" + }, + { + "replacements": "0x00000000", + "mask": "0x0103feff" + } + ], + [ + { + "leaf": "0x40000001", + "sub_leaf": { + "start": 0, + "end": 0 + }, + "register": "EDX" + }, + { + "replacements": "0x00000000", + "mask": "0x00000001" + } + ] + ] +} \ No newline at end of file From 0b760f28172db346e844e52cedeeef59f6e9a3de Mon Sep 17 00:00:00 2001 From: Oliver Anderson Date: Wed, 10 Dec 2025 11:00:16 +0100 Subject: [PATCH 291/294] arch: CPUID definitions data structures We introduce data structures to describe values within the registers modified by the CPUID instruction. These data structures will later be used by the upcoming CPU profile generation tool. Signed-off-by: Oliver Anderson On-behalf-of: SAP oliver.anderson@sap.com --- arch/src/x86_64/cpuid_definitions/mod.rs | 71 ++++++++++++++++++++++++ 1 file changed, 71 insertions(+) diff --git a/arch/src/x86_64/cpuid_definitions/mod.rs b/arch/src/x86_64/cpuid_definitions/mod.rs index a5b39dbe03..bff3cc6a4b 100644 --- a/arch/src/x86_64/cpuid_definitions/mod.rs +++ b/arch/src/x86_64/cpuid_definitions/mod.rs @@ -44,6 +44,77 @@ pub struct Parameters { pub register: CpuidReg, } +/// Describes a policy for how the corresponding CPUID data should be considered when building +/// a CPU profile. +#[derive(Debug, Copy, Clone, Eq, PartialEq)] +pub enum ProfilePolicy { + /// Store the corresponding data when building the CPU profile. + /// + /// When the CPU profile gets utilized the corresponding data will be set into the modified + /// CPUID instruction(s). + Inherit, + /// Ignore the corresponding data when building the CPU profile. + /// + /// When the CPU profile gets utilized the corresponding data will then instead get + /// extracted from the host. + /// + /// This variant is typically set for data that has no effect on migration compatibility, + /// but there may be some exceptions such as data which is necessary to run the VM at all, + /// but must coincide with whatever is on the host. + Passthrough, + /// Set the following hardcoded value in the CPU profile. + /// + /// This variant is typically used for features/values that don't work well with live migration (even when using the exact same physical CPU model). + Static(u32), +} + +/// A description of a range of bits in a register populated by the CPUID instruction with specific parameters. +#[derive(Clone, Copy, Debug)] +pub struct ValueDefinition { + /// A short name for the value obtainable through CPUID + pub short: &'static str, + /// A description of the value obtainable through CPUID + pub description: &'static str, + /// The range of bits in the output register corresponding to this feature or value. + /// + /// This is not a `RangeInclusive` because that type does unfortunately not implement `Copy`. + pub bits_range: (u8, u8), + /// The policy corresponding to this value when building CPU profiles. + pub policy: ProfilePolicy, +} + +/// Describes values within a register populated by the CPUID instruction with specific parameters. +/// +/// NOTE: The only way to interact with this value (beyond this crate) is via the const [`Self::as_slice()`](Self::as_slice) method. +pub struct ValueDefinitions(&'static [ValueDefinition]); +impl ValueDefinitions { + /// Constructor permitting at most 32 entries. + const fn new(cpuid_descriptions: &'static [ValueDefinition]) -> Self { + // Note that this function is only called within this module, at compile time, hence it is fine to have some + // additional sanity checks such as the following assert. + assert!(cpuid_descriptions.len() <= 32); + Self(cpuid_descriptions) + } + /// Converts this into a slice representation. This is the only way to read values of this type. + pub const fn as_slice(&self) -> &'static [ValueDefinition] { + self.0 + } +} + +/// Describes multiple CPUID outputs. +/// +/// Each wrapped [`ValueDefinitions`] corresponds to the given [`Parameters`] in the same tuple. +/// +pub struct CpuidDefinitions( + [(Parameters, ValueDefinitions); NUM_PARAMETERS], +); + +impl CpuidDefinitions { + pub const fn as_slice(&self) -> &[(Parameters, ValueDefinitions); NUM_PARAMETERS] { + &self.0 + } +} + #[cfg(test)] mod tests { use proptest::prelude::*; From c0eae6fad18f20b2560bb3334e5fb3bc9d2cb3b8 Mon Sep 17 00:00:00 2001 From: Oliver Anderson Date: Wed, 10 Dec 2025 10:40:49 +0100 Subject: [PATCH 292/294] arch: Intel CPUID definitions We introduce CPUID definitions for Intel CPUs that will be utilized by the upcoming CPU Profile generation tool. Signed-off-by: Oliver Anderson On-behalf-of: SAP oliver.anderson@sap.com --- arch/src/x86_64/cpuid_definitions/intel.rs | 4814 ++++++++++++++++++++ arch/src/x86_64/cpuid_definitions/mod.rs | 2 + 2 files changed, 4816 insertions(+) create mode 100644 arch/src/x86_64/cpuid_definitions/intel.rs diff --git a/arch/src/x86_64/cpuid_definitions/intel.rs b/arch/src/x86_64/cpuid_definitions/intel.rs new file mode 100644 index 0000000000..cf43b63a8e --- /dev/null +++ b/arch/src/x86_64/cpuid_definitions/intel.rs @@ -0,0 +1,4814 @@ +//! This module contains CPUID definitions for Intel CPUs. +use std::ops::RangeInclusive; + +use super::{ + CpuidDefinitions, CpuidReg, Parameters, ProfilePolicy, ValueDefinition, ValueDefinitions, +}; + +/// Contains CPUID definitions described in "Intel Architecture Instruction Set Extensions and Future Features" +/// +/// ## Missing leaves +/// +/// The following known CPUID leaves are left out of this table: +/// - 0x3 (Only relevant for Intel Pentium III), +/// - 0x12 (Only relevant for SGX which is deprecated), +/// - 0x19 (Key locker leaf. These features are not in scope for CPU profiles for the time being) +/// - 0x1a (Native Model ID Enumeration leaf), +/// - 0x1b (PCONFIG Information Sub-leaf. This is not in scope for CPU profiles for the time being), +/// - 0x27 (L3 Cache Intel RDT Monitoring Capability Asymmetric Enumeration), +/// - 0x28 (Intel Resource Director Technology Allocation Asymmetric Enumeration), +/// - 0x21 (Only relevant for Intel TDX which is not in scope fore CPU profiles for the time being), +/// - 0x40000000 - 0x4FFFFFFF (Reserved for hypervisors), +/// +/// ### How we produced this table +/// +/// We first ran the [`cpuidgen` tool](https://gitlab.com/x86-cpuid.org/x86-cpuid-db), whose +/// output is licensed under the SPDX Creative Commons Zero 1.0 Universal License. We then wrote a +/// throw-away Rust script to modify the output into something more similar to Rust code. Following +/// this we used macros and other functionality in the [Helix editor](https://helix-editor.com/) to +/// get actual Rust code. +/// +/// We then read through the CPUID section (1.4) of the Intel Architecture Instruction Set +/// Extensions and Future Features manual and manually inserted several leaf definitions that +/// we noticed were missing from the table we had produced. During this process we also changed +/// a few of the short names and descriptions to be more inline with what is written in the +/// aforementioned Intel manual. Finally we decided on a [`ProfilePolicy`] to be set for every +/// single [`ValueDefinition`] and manually appended those. +pub static INTEL_CPUID_DEFINITIONS: CpuidDefinitions<154> = const { + CpuidDefinitions([ + // ========================================================================================= + // Basic CPUID Information + // ========================================================================================= + ( + Parameters { + leaf: 0x0, + sub_leaf: RangeInclusive::new(0, 0), + register: CpuidReg::EAX, + }, + ValueDefinitions::new(&[ValueDefinition { + short: "max_std_leaf", + description: "Maximum Input value for Basic CPUID Information", + bits_range: (0, 31), + policy: ProfilePolicy::Inherit, + }]), + ), + ( + Parameters { + leaf: 0x0, + sub_leaf: RangeInclusive::new(0, 0), + register: CpuidReg::EBX, + }, + ValueDefinitions::new(&[ValueDefinition { + short: "cpu_vendorid_0", + description: "CPU vendor ID string bytes 0 - 3", + bits_range: (0, 31), + policy: ProfilePolicy::Inherit, + }]), + ), + ( + Parameters { + leaf: 0x0, + sub_leaf: RangeInclusive::new(0, 0), + register: CpuidReg::ECX, + }, + ValueDefinitions::new(&[ValueDefinition { + short: "cpu_vendorid_2", + description: "CPU vendor ID string bytes 8 - 11", + bits_range: (0, 31), + policy: ProfilePolicy::Inherit, + }]), + ), + ( + Parameters { + leaf: 0x0, + sub_leaf: RangeInclusive::new(0, 0), + register: CpuidReg::EDX, + }, + ValueDefinitions::new(&[ValueDefinition { + short: "cpu_vendorid_1", + description: "CPU vendor ID string bytes 4 - 7", + bits_range: (0, 31), + policy: ProfilePolicy::Inherit, + }]), + ), + // TODO: Do we really want to inherit these values from the corresponding CPU, or should we zero it out or set something else here? + ( + Parameters { + leaf: 0x1, + sub_leaf: RangeInclusive::new(0, 0), + register: CpuidReg::EAX, + }, + ValueDefinitions::new(&[ + ValueDefinition { + short: "stepping", + description: "Stepping ID", + bits_range: (0, 3), + policy: ProfilePolicy::Inherit, + }, + ValueDefinition { + short: "base_model", + description: "Base CPU model ID", + bits_range: (4, 7), + policy: ProfilePolicy::Inherit, + }, + ValueDefinition { + short: "base_family_id", + description: "Base CPU family ID", + bits_range: (8, 11), + policy: ProfilePolicy::Inherit, + }, + ValueDefinition { + short: "cpu_type", + description: "CPU type", + bits_range: (12, 13), + policy: ProfilePolicy::Inherit, + }, + ValueDefinition { + short: "ext_model", + description: "Extended CPU model ID", + bits_range: (16, 19), + policy: ProfilePolicy::Inherit, + }, + ValueDefinition { + short: "ext_family", + description: "Extended CPU family ID", + bits_range: (20, 27), + policy: ProfilePolicy::Inherit, + }, + ]), + ), + ( + Parameters { + leaf: 0x1, + sub_leaf: RangeInclusive::new(0, 0), + register: CpuidReg::EBX, + }, + ValueDefinitions::new(&[ + ValueDefinition { + short: "brand_id", + description: "Brand index", + bits_range: (0, 7), + policy: ProfilePolicy::Inherit, + }, + ValueDefinition { + short: "clflush_size", + description: "CLFLUSH instruction cache line size", + bits_range: (8, 15), + policy: ProfilePolicy::Passthrough, + }, + // This is set by cloud hypervisor + ValueDefinition { + short: "n_logical_cpu", + description: "Logical CPU count", + bits_range: (16, 23), + policy: ProfilePolicy::Static(0), + }, + // This is set by cloud hypervisor + ValueDefinition { + short: "local_apic_id", + description: "Initial local APIC physical ID", + bits_range: (24, 31), + policy: ProfilePolicy::Static(0), + }, + ]), + ), + ( + Parameters { + leaf: 0x1, + sub_leaf: RangeInclusive::new(0, 0), + register: CpuidReg::ECX, + }, + ValueDefinitions::new(&[ + ValueDefinition { + short: "sse3", + description: "Streaming SIMD Extensions 3 (SSE3)", + bits_range: (0, 0), + policy: ProfilePolicy::Inherit, + }, + ValueDefinition { + short: "pclmulqdq", + description: "PCLMULQDQ instruction support", + bits_range: (1, 1), + policy: ProfilePolicy::Inherit, + }, + ValueDefinition { + short: "dtes64", + description: "64-bit DS save area", + bits_range: (2, 2), + policy: ProfilePolicy::Static(0), + }, + ValueDefinition { + short: "monitor", + description: "MONITOR/MWAIT support", + bits_range: (3, 3), + policy: ProfilePolicy::Static(0), + }, + ValueDefinition { + short: "ds_cpl", + description: "CPL Qualified Debug Store", + bits_range: (4, 4), + policy: ProfilePolicy::Static(0), + }, + // TODO: Ideally configurable by the user (host must have this otherwise CHV will not run) + ValueDefinition { + short: "vmx", + description: "Virtual Machine Extensions", + bits_range: (5, 5), + policy: ProfilePolicy::Static(1), + }, + ValueDefinition { + short: "smx", + description: "Safer Mode Extensions", + bits_range: (6, 6), + policy: ProfilePolicy::Static(0), + }, + ValueDefinition { + short: "est", + description: "Enhanced Intel SpeedStep", + bits_range: (7, 7), + policy: ProfilePolicy::Static(0), + }, + ValueDefinition { + short: "tm2", + description: "Thermal Monitor 2", + bits_range: (8, 8), + policy: ProfilePolicy::Static(0), + }, + ValueDefinition { + short: "ssse3", + description: "Supplemental SSE3", + bits_range: (9, 9), + policy: ProfilePolicy::Inherit, + }, + // MSR related + ValueDefinition { + short: "cnxt_id", + description: "L1 Context ID", + bits_range: (10, 10), + policy: ProfilePolicy::Static(0), + }, + ValueDefinition { + short: "sdbg", + description: "Silicon Debug", + bits_range: (11, 11), + policy: ProfilePolicy::Static(0), + }, + ValueDefinition { + short: "fma", + description: "FMA extensions using YMM state", + bits_range: (12, 12), + policy: ProfilePolicy::Inherit, + }, + ValueDefinition { + short: "cx16", + description: "CMPXCHG16B instruction support", + bits_range: (13, 13), + policy: ProfilePolicy::Inherit, + }, + // MSR related + ValueDefinition { + short: "xtpr", + description: "xTPR Update Control", + bits_range: (14, 14), + policy: ProfilePolicy::Static(0), + }, + // MSR related + ValueDefinition { + short: "pdcm", + description: "Perfmon and Debug Capability", + bits_range: (15, 15), + policy: ProfilePolicy::Static(0), + }, + ValueDefinition { + short: "pcid", + description: "Process-context identifiers", + bits_range: (17, 17), + policy: ProfilePolicy::Inherit, + }, + ValueDefinition { + short: "dca", + description: "Direct Cache Access", + bits_range: (18, 18), + policy: ProfilePolicy::Inherit, + }, + ValueDefinition { + short: "sse4_1", + description: "SSE4.1", + bits_range: (19, 19), + policy: ProfilePolicy::Inherit, + }, + ValueDefinition { + short: "sse4_2", + description: "SSE4.2", + bits_range: (20, 20), + policy: ProfilePolicy::Inherit, + }, + // Set by Cloud hypervisor + ValueDefinition { + short: "x2apic", + description: "X2APIC support", + bits_range: (21, 21), + policy: ProfilePolicy::Static(1), + }, + ValueDefinition { + short: "movbe", + description: "MOVBE instruction support", + bits_range: (22, 22), + policy: ProfilePolicy::Inherit, + }, + ValueDefinition { + short: "popcnt", + description: "POPCNT instruction support", + bits_range: (23, 23), + policy: ProfilePolicy::Inherit, + }, + // Set by Cloud hypervisor + ValueDefinition { + short: "tsc_deadline_timer", + description: "APIC timer one-shot operation", + bits_range: (24, 24), + policy: ProfilePolicy::Static(0), + }, + ValueDefinition { + short: "aes", + description: "AES instructions", + bits_range: (25, 25), + policy: ProfilePolicy::Inherit, + }, + ValueDefinition { + short: "xsave", + description: "XSAVE (and related instructions) support", + bits_range: (26, 26), + policy: ProfilePolicy::Inherit, + }, + ValueDefinition { + short: "osxsave", + description: "XSAVE (and related instructions) are enabled by OS", + bits_range: (27, 27), + policy: ProfilePolicy::Inherit, + }, + ValueDefinition { + short: "avx", + description: "AVX instructions support", + bits_range: (28, 28), + policy: ProfilePolicy::Inherit, + }, + ValueDefinition { + short: "f16c", + description: "Half-precision floating-point conversion support", + bits_range: (29, 29), + policy: ProfilePolicy::Inherit, + }, + ValueDefinition { + short: "rdrand", + description: "RDRAND instruction support", + bits_range: (30, 30), + policy: ProfilePolicy::Inherit, + }, + // TODO: If set by CHV set to 0 and write comment + ValueDefinition { + short: "guest_status", + description: "System is running as guest; (para-)virtualized system", + bits_range: (31, 31), + policy: ProfilePolicy::Passthrough, + }, + ]), + ), + ( + Parameters { + leaf: 0x1, + sub_leaf: RangeInclusive::new(0, 0), + register: CpuidReg::EDX, + }, + ValueDefinitions::new(&[ + ValueDefinition { + short: "fpu", + description: "Floating-Point Unit on-chip (x87)", + bits_range: (0, 0), + policy: ProfilePolicy::Inherit, + }, + ValueDefinition { + short: "vme", + description: "Virtual-8086 Mode Extensions", + bits_range: (1, 1), + policy: ProfilePolicy::Inherit, + }, + ValueDefinition { + short: "de", + description: "Debugging Extensions", + bits_range: (2, 2), + policy: ProfilePolicy::Inherit, + }, + ValueDefinition { + short: "pse", + description: "Page Size Extension", + bits_range: (3, 3), + policy: ProfilePolicy::Inherit, + }, + ValueDefinition { + short: "tsc", + description: "Time Stamp Counter", + bits_range: (4, 4), + policy: ProfilePolicy::Inherit, + }, + ValueDefinition { + short: "msr", + description: "Model-Specific Registers (RDMSR and WRMSR support)", + bits_range: (5, 5), + policy: ProfilePolicy::Inherit, + }, + ValueDefinition { + short: "pae", + description: "Physical Address Extensions", + bits_range: (6, 6), + policy: ProfilePolicy::Inherit, + }, + ValueDefinition { + short: "mce", + description: "Machine Check Exception", + bits_range: (7, 7), + policy: ProfilePolicy::Inherit, + }, + ValueDefinition { + short: "cx8", + description: "CMPXCHG8B instruction", + bits_range: (8, 8), + policy: ProfilePolicy::Inherit, + }, + ValueDefinition { + short: "apic", + description: "APIC on-chip", + bits_range: (9, 9), + policy: ProfilePolicy::Static(1), + }, + // MSR related (maybe not necessary to look into which ones) + ValueDefinition { + short: "sep", + description: "SYSENTER, SYSEXIT, and associated MSRs", + bits_range: (11, 11), + policy: ProfilePolicy::Inherit, + }, + ValueDefinition { + short: "mtrr", + description: "Memory Type Range Registers", + bits_range: (12, 12), + policy: ProfilePolicy::Inherit, + }, + ValueDefinition { + short: "pge", + description: "Page Global Extensions", + bits_range: (13, 13), + policy: ProfilePolicy::Inherit, + }, + ValueDefinition { + short: "mca", + description: "Machine Check Architecture", + bits_range: (14, 14), + policy: ProfilePolicy::Inherit, + }, + ValueDefinition { + short: "cmov", + description: "Conditional Move Instruction", + bits_range: (15, 15), + policy: ProfilePolicy::Inherit, + }, + ValueDefinition { + short: "pat", + description: "Page Attribute Table", + bits_range: (16, 16), + policy: ProfilePolicy::Inherit, + }, + ValueDefinition { + short: "pse36", + description: "Page Size Extension (36-bit)", + bits_range: (17, 17), + policy: ProfilePolicy::Inherit, + }, + ValueDefinition { + short: "psn", + description: "Processor Serial Number", + bits_range: (18, 18), + policy: ProfilePolicy::Static(0), + }, + ValueDefinition { + short: "clfsh", + description: "CLFLUSH instruction", + bits_range: (19, 19), + policy: ProfilePolicy::Inherit, + }, + ValueDefinition { + short: "ds", + description: "Debug Store", + bits_range: (21, 21), + policy: ProfilePolicy::Static(0), + }, + ValueDefinition { + short: "acpi", + description: "Thermal monitor and clock control", + bits_range: (22, 22), + policy: ProfilePolicy::Static(0), + }, + ValueDefinition { + short: "mmx", + description: "MMX instructions", + bits_range: (23, 23), + policy: ProfilePolicy::Inherit, + }, + ValueDefinition { + short: "fxsr", + description: "FXSAVE and FXRSTOR instructions", + bits_range: (24, 24), + policy: ProfilePolicy::Inherit, + }, + ValueDefinition { + short: "sse", + description: "SSE instructions", + bits_range: (25, 25), + policy: ProfilePolicy::Inherit, + }, + ValueDefinition { + short: "sse2", + description: "SSE2 instructions", + bits_range: (26, 26), + policy: ProfilePolicy::Inherit, + }, + ValueDefinition { + short: "ss", + description: "Self Snoop", + bits_range: (27, 27), + policy: ProfilePolicy::Passthrough, + }, + ValueDefinition { + short: "htt", + description: "Hyper-threading", + bits_range: (28, 28), + policy: ProfilePolicy::Inherit, + }, + ValueDefinition { + short: "tm", + description: "Thermal Monitor", + bits_range: (29, 29), + policy: ProfilePolicy::Static(0), + }, + // TODO: Not really sure what the default should be for PBE. It seems like it is something that needs to be enabled via the IA32_MISC_ENABLE MSR hence perhaps this should be set via CPU features? + // MSR related + ValueDefinition { + short: "pbe", + description: "Pending Break Enable", + bits_range: (31, 31), + policy: ProfilePolicy::Static(0), + }, + ]), + ), + // ========================================================================================= + // Cache and TLB Information + // ========================================================================================= + ( + Parameters { + leaf: 0x2, + sub_leaf: RangeInclusive::new(0, 0), + register: CpuidReg::EAX, + }, + ValueDefinitions::new(&[ + ValueDefinition { + short: "iteration_count", + description: "Number of times this leaf must be queried", + bits_range: (0, 7), + policy: ProfilePolicy::Passthrough, + }, + ValueDefinition { + short: "desc1", + description: "Descriptor #1", + bits_range: (8, 15), + policy: ProfilePolicy::Passthrough, + }, + ValueDefinition { + short: "desc2", + description: "Descriptor #2", + bits_range: (16, 23), + policy: ProfilePolicy::Passthrough, + }, + ValueDefinition { + short: "desc3", + description: "Descriptor #3", + bits_range: (24, 30), + policy: ProfilePolicy::Passthrough, + }, + ValueDefinition { + short: "eax_invalid", + description: "Descriptors 1-3 are invalid if set", + bits_range: (31, 31), + policy: ProfilePolicy::Passthrough, + }, + ]), + ), + ( + Parameters { + leaf: 0x2, + sub_leaf: RangeInclusive::new(0, 0), + register: CpuidReg::EBX, + }, + ValueDefinitions::new(&[ + ValueDefinition { + short: "desc4", + description: "Descriptor #4", + bits_range: (0, 7), + policy: ProfilePolicy::Passthrough, + }, + ValueDefinition { + short: "desc5", + description: "Descriptor #5", + bits_range: (8, 15), + policy: ProfilePolicy::Passthrough, + }, + ValueDefinition { + short: "desc6", + description: "Descriptor #6", + bits_range: (16, 23), + policy: ProfilePolicy::Passthrough, + }, + ValueDefinition { + short: "desc7", + description: "Descriptor #7", + bits_range: (24, 30), + policy: ProfilePolicy::Passthrough, + }, + ValueDefinition { + short: "ebx_invalid", + description: "Descriptors 4-7 are invalid if set", + bits_range: (31, 31), + policy: ProfilePolicy::Passthrough, + }, + ]), + ), + ( + Parameters { + leaf: 0x2, + sub_leaf: RangeInclusive::new(0, 0), + register: CpuidReg::ECX, + }, + ValueDefinitions::new(&[ + ValueDefinition { + short: "desc8", + description: "Descriptor #8", + bits_range: (0, 7), + policy: ProfilePolicy::Passthrough, + }, + ValueDefinition { + short: "desc9", + description: "Descriptor #9", + bits_range: (8, 15), + policy: ProfilePolicy::Passthrough, + }, + ValueDefinition { + short: "desc10", + description: "Descriptor #10", + bits_range: (16, 23), + policy: ProfilePolicy::Passthrough, + }, + ValueDefinition { + short: "desc11", + description: "Descriptor #11", + bits_range: (24, 30), + policy: ProfilePolicy::Passthrough, + }, + ValueDefinition { + short: "ecx_invalid", + description: "Descriptors 8-11 are invalid if set", + bits_range: (31, 31), + policy: ProfilePolicy::Passthrough, + }, + ]), + ), + ( + Parameters { + leaf: 0x2, + sub_leaf: RangeInclusive::new(0, 0), + register: CpuidReg::EDX, + }, + ValueDefinitions::new(&[ + ValueDefinition { + short: "desc12", + description: "Descriptor #12", + bits_range: (0, 7), + policy: ProfilePolicy::Passthrough, + }, + ValueDefinition { + short: "desc13", + description: "Descriptor #13", + bits_range: (8, 15), + policy: ProfilePolicy::Passthrough, + }, + ValueDefinition { + short: "desc14", + description: "Descriptor #14", + bits_range: (16, 23), + policy: ProfilePolicy::Passthrough, + }, + ValueDefinition { + short: "desc15", + description: "Descriptor #15", + bits_range: (24, 30), + policy: ProfilePolicy::Passthrough, + }, + ValueDefinition { + short: "edx_invalid", + description: "Descriptors 12-15 are invalid if set", + bits_range: (31, 31), + policy: ProfilePolicy::Passthrough, + }, + ]), + ), + // ========================================================================================= + // Deterministic Cache Parameters + // ========================================================================================= + ( + Parameters { + leaf: 0x4, + sub_leaf: RangeInclusive::new(0, u32::MAX), + register: CpuidReg::EAX, + }, + ValueDefinitions::new(&[ + ValueDefinition { + short: "cache_type", + description: "Cache type field", + bits_range: (0, 4), + policy: ProfilePolicy::Passthrough, + }, + ValueDefinition { + short: "cache_level", + description: "Cache level (1-based)", + bits_range: (5, 7), + policy: ProfilePolicy::Passthrough, + }, + // TODO: Could there be a problem migrating from a CPU with self-initializing cache to one without? + ValueDefinition { + short: "cache_self_init", + description: "Self-initializing cache level", + bits_range: (8, 8), + policy: ProfilePolicy::Passthrough, + }, + ValueDefinition { + short: "fully_associative", + description: "Fully-associative cache", + bits_range: (9, 9), + policy: ProfilePolicy::Passthrough, + }, + ValueDefinition { + short: "num_threads_sharing", + description: "Number logical CPUs sharing this cache", + bits_range: (14, 25), + policy: ProfilePolicy::Passthrough, + }, + ValueDefinition { + short: "num_cores_on_die", + description: "Number of cores in the physical package", + bits_range: (26, 31), + policy: ProfilePolicy::Passthrough, + }, + ]), + ), + ( + Parameters { + leaf: 0x4, + sub_leaf: RangeInclusive::new(0, u32::MAX), + register: CpuidReg::EBX, + }, + ValueDefinitions::new(&[ + ValueDefinition { + short: "cache_linesize", + description: "System coherency line size (0-based)", + bits_range: (0, 11), + policy: ProfilePolicy::Passthrough, + }, + ValueDefinition { + short: "cache_npartitions", + description: "Physical line partitions (0-based)", + bits_range: (12, 21), + policy: ProfilePolicy::Passthrough, + }, + ValueDefinition { + short: "cache_nways", + description: "Ways of associativity (0-based)", + bits_range: (22, 31), + policy: ProfilePolicy::Passthrough, + }, + ]), + ), + ( + Parameters { + leaf: 0x4, + sub_leaf: RangeInclusive::new(0, u32::MAX), + register: CpuidReg::ECX, + }, + ValueDefinitions::new(&[ValueDefinition { + short: "cache_nsets", + description: "Cache number of sets (0-based)", + bits_range: (0, 30), + policy: ProfilePolicy::Passthrough, + }]), + ), + ( + Parameters { + leaf: 0x4, + sub_leaf: RangeInclusive::new(0, u32::MAX), + register: CpuidReg::EDX, + }, + ValueDefinitions::new(&[ + ValueDefinition { + short: "wbinvd_rll_no_guarantee", + description: "WBINVD/INVD not guaranteed for Remote Lower-Level caches", + bits_range: (0, 0), + policy: ProfilePolicy::Passthrough, + }, + ValueDefinition { + short: "ll_inclusive", + description: "Cache is inclusive of Lower-Level caches", + bits_range: (1, 1), + policy: ProfilePolicy::Passthrough, + }, + ValueDefinition { + short: "complex_indexing", + description: "Not a direct-mapped cache (complex function)", + bits_range: (2, 2), + policy: ProfilePolicy::Passthrough, + }, + ]), + ), + // ========================================================================================= + // MONITOR/MWAIT + // ========================================================================================= + ( + Parameters { + leaf: 0x5, + sub_leaf: RangeInclusive::new(0, 0), + register: CpuidReg::EAX, + }, + ValueDefinitions::new(&[ValueDefinition { + short: "min_mon_size", + description: "Smallest monitor-line size, in bytes", + bits_range: (0, 15), + policy: ProfilePolicy::Static(0), + }]), + ), + ( + Parameters { + leaf: 0x5, + sub_leaf: RangeInclusive::new(0, 0), + register: CpuidReg::EBX, + }, + ValueDefinitions::new(&[ValueDefinition { + short: "max_mon_size", + description: "Largest monitor-line size, in bytes", + bits_range: (0, 15), + policy: ProfilePolicy::Static(0), + }]), + ), + ( + Parameters { + leaf: 0x5, + sub_leaf: RangeInclusive::new(0, 0), + register: CpuidReg::ECX, + }, + ValueDefinitions::new(&[ + ValueDefinition { + short: "mwait_ext", + description: "Enumeration of MONITOR/MWAIT extensions is supported", + bits_range: (0, 0), + policy: ProfilePolicy::Static(0), + }, + ValueDefinition { + short: "mwait_irq_break", + description: "Interrupts as a break-event for MWAIT is supported", + bits_range: (1, 1), + policy: ProfilePolicy::Static(0), + }, + ]), + ), + ( + Parameters { + leaf: 0x5, + sub_leaf: RangeInclusive::new(0, 0), + register: CpuidReg::EDX, + }, + ValueDefinitions::new(&[ + ValueDefinition { + short: "n_c0_substates", + description: "Number of C0 sub C-states supported using MWAIT", + bits_range: (0, 3), + policy: ProfilePolicy::Static(0), + }, + ValueDefinition { + short: "n_c1_substates", + description: "Number of C1 sub C-states supported using MWAIT", + bits_range: (4, 7), + policy: ProfilePolicy::Static(0), + }, + ValueDefinition { + short: "n_c2_substates", + description: "Number of C2 sub C-states supported using MWAIT", + bits_range: (8, 11), + policy: ProfilePolicy::Static(0), + }, + ValueDefinition { + short: "n_c3_substates", + description: "Number of C3 sub C-states supported using MWAIT", + bits_range: (12, 15), + policy: ProfilePolicy::Static(0), + }, + ValueDefinition { + short: "n_c4_substates", + description: "Number of C4 sub C-states supported using MWAIT", + bits_range: (16, 19), + policy: ProfilePolicy::Static(0), + }, + ValueDefinition { + short: "n_c5_substates", + description: "Number of C5 sub C-states supported using MWAIT", + bits_range: (20, 23), + policy: ProfilePolicy::Static(0), + }, + ValueDefinition { + short: "n_c6_substates", + description: "Number of C6 sub C-states supported using MWAIT", + bits_range: (24, 27), + policy: ProfilePolicy::Static(0), + }, + ValueDefinition { + short: "n_c7_substates", + description: "Number of C7 sub C-states supported using MWAIT", + bits_range: (28, 31), + policy: ProfilePolicy::Static(0), + }, + ]), + ), + // ========================================================================================= + // Thermal and Power Management + // ========================================================================================= + ( + Parameters { + leaf: 0x6, + sub_leaf: RangeInclusive::new(0, 0), + register: CpuidReg::EAX, + }, + ValueDefinitions::new(&[ + ValueDefinition { + short: "dtherm", + description: "Digital temperature sensor", + bits_range: (0, 0), + policy: ProfilePolicy::Static(0), + }, + ValueDefinition { + short: "turbo_boost", + description: "Intel Turbo Boost", + bits_range: (1, 1), + policy: ProfilePolicy::Static(0), + }, + ValueDefinition { + short: "arat", + description: "Always-Running APIC Timer (not affected by p-state)", + bits_range: (2, 2), + policy: ProfilePolicy::Inherit, + }, + ValueDefinition { + short: "pln", + description: "Power Limit Notification (PLN) event", + bits_range: (4, 4), + policy: ProfilePolicy::Static(0), + }, + ValueDefinition { + short: "ecmd", + description: "Clock modulation duty cycle extension", + bits_range: (5, 5), + policy: ProfilePolicy::Static(0), + }, + ValueDefinition { + short: "pts", + description: "Package thermal management", + bits_range: (6, 6), + policy: ProfilePolicy::Static(0), + }, + ValueDefinition { + short: "hwp", + description: "HWP (Hardware P-states) base registers are supported", + bits_range: (7, 7), + policy: ProfilePolicy::Static(0), + }, + ValueDefinition { + short: "hwp_notify", + description: "HWP notification (IA32_HWP_INTERRUPT MSR)", + bits_range: (8, 8), + policy: ProfilePolicy::Static(0), + }, + ValueDefinition { + short: "hwp_act_window", + description: "HWP activity window (IA32_HWP_REQUEST[bits 41:32]) supported", + bits_range: (9, 9), + policy: ProfilePolicy::Static(0), + }, + ValueDefinition { + short: "hwp_epp", + description: "HWP Energy Performance Preference", + bits_range: (10, 10), + policy: ProfilePolicy::Static(0), + }, + ValueDefinition { + short: "hwp_pkg_req", + description: "HWP Package Level Request", + bits_range: (11, 11), + policy: ProfilePolicy::Static(0), + }, + ValueDefinition { + short: "hdc_base_regs", + description: "HDC base registers are supported", + bits_range: (13, 13), + policy: ProfilePolicy::Static(0), + }, + ValueDefinition { + short: "turbo_boost_3_0", + description: "Intel Turbo Boost Max 3.0", + bits_range: (14, 14), + policy: ProfilePolicy::Static(0), + }, + ValueDefinition { + short: "hwp_capabilities", + description: "HWP Highest Performance change", + bits_range: (15, 15), + policy: ProfilePolicy::Static(0), + }, + ValueDefinition { + short: "hwp_peci_override", + description: "HWP PECI override", + bits_range: (16, 16), + policy: ProfilePolicy::Static(0), + }, + ValueDefinition { + short: "hwp_flexible", + description: "Flexible HWP", + bits_range: (17, 17), + policy: ProfilePolicy::Static(0), + }, + ValueDefinition { + short: "hwp_fast", + description: "IA32_HWP_REQUEST MSR fast access mode", + bits_range: (18, 18), + policy: ProfilePolicy::Static(0), + }, + ValueDefinition { + short: "hfi", + description: "HW_FEEDBACK MSRs supported", + bits_range: (19, 19), + policy: ProfilePolicy::Static(0), + }, + ValueDefinition { + short: "hwp_ignore_idle", + description: "Ignoring idle logical CPU HWP req is supported", + bits_range: (20, 20), + policy: ProfilePolicy::Static(0), + }, + ValueDefinition { + short: "thread_director", + description: "Intel thread director support", + bits_range: (23, 23), + policy: ProfilePolicy::Static(0), + }, + ValueDefinition { + short: "therm_interrupt_bit25", + description: "IA32_THERM_INTERRUPT MSR bit 25 is supported", + bits_range: (24, 24), + policy: ProfilePolicy::Static(0), + }, + ]), + ), + ( + Parameters { + leaf: 0x6, + sub_leaf: RangeInclusive::new(0, 0), + register: CpuidReg::EBX, + }, + ValueDefinitions::new(&[ValueDefinition { + short: "n_therm_thresholds", + description: "Digital thermometer thresholds", + bits_range: (0, 3), + policy: ProfilePolicy::Static(0), + }]), + ), + ( + Parameters { + leaf: 0x6, + sub_leaf: RangeInclusive::new(0, 0), + register: CpuidReg::ECX, + }, + ValueDefinitions::new(&[ + // MSR related + ValueDefinition { + short: "aperfmperf", + description: "MPERF/APERF MSRs (effective frequency interface)", + bits_range: (0, 0), + policy: ProfilePolicy::Static(0), + }, + // MSR related + ValueDefinition { + short: "epb", + description: "IA32_ENERGY_PERF_BIAS MSR support", + bits_range: (3, 3), + policy: ProfilePolicy::Static(0), + }, + ValueDefinition { + short: "thrd_director_nclasses", + description: "Number of classes, Intel thread director", + bits_range: (8, 15), + policy: ProfilePolicy::Static(0), + }, + ]), + ), + ( + Parameters { + leaf: 0x6, + sub_leaf: RangeInclusive::new(0, 0), + register: CpuidReg::EDX, + }, + ValueDefinitions::new(&[ + ValueDefinition { + short: "perfcap_reporting", + description: "Performance capability reporting", + bits_range: (0, 0), + policy: ProfilePolicy::Static(0), + }, + ValueDefinition { + short: "encap_reporting", + description: "Energy efficiency capability reporting", + bits_range: (1, 1), + policy: ProfilePolicy::Static(0), + }, + ValueDefinition { + short: "feedback_sz", + description: "Feedback interface structure size, in 4K pages", + bits_range: (8, 11), + policy: ProfilePolicy::Static(0), + }, + ValueDefinition { + short: "this_lcpu_hwfdbk_idx", + description: "This logical CPU hardware feedback interface index", + bits_range: (16, 31), + policy: ProfilePolicy::Static(0), + }, + ]), + ), + // =================================================================================================================== + // Structured Extended Feature Flags Enumeration Main Leaf + // =================================================================================================================== + ( + Parameters { + leaf: 0x7, + sub_leaf: RangeInclusive::new(0, 0), + register: CpuidReg::EAX, + }, + ValueDefinitions::new(&[ValueDefinition { + short: "leaf7_n_subleaves", + description: "Number of leaf 0x7 subleaves", + bits_range: (0, 31), + policy: ProfilePolicy::Inherit, + }]), + ), + ( + Parameters { + leaf: 0x7, + sub_leaf: RangeInclusive::new(0, 0), + register: CpuidReg::EBX, + }, + ValueDefinitions::new(&[ + ValueDefinition { + short: "fsgsbase", + description: "FSBASE/GSBASE read/write support", + bits_range: (0, 0), + policy: ProfilePolicy::Inherit, + }, + // MSR related + ValueDefinition { + short: "tsc_adjust", + description: "IA32_TSC_ADJUST MSR supported", + bits_range: (1, 1), + policy: ProfilePolicy::Inherit, + }, + // SGX is deprecated so we disable it unconditionally for all CPU profiles + ValueDefinition { + short: "sgx", + description: "Intel SGX (Software Guard Extensions)", + bits_range: (2, 2), + policy: ProfilePolicy::Static(0), + }, + ValueDefinition { + short: "bmi1", + description: "Bit manipulation extensions group 1", + bits_range: (3, 3), + policy: ProfilePolicy::Inherit, + }, + // TSX related which is riddled with CVEs. Consider two profiles, or making it opt-in/out. QEMU always has a CPU model with and without TSX. + ValueDefinition { + short: "hle", + description: "Hardware Lock Elision", + bits_range: (4, 4), + policy: ProfilePolicy::Static(0), + }, + ValueDefinition { + short: "avx2", + description: "AVX2 instruction set", + bits_range: (5, 5), + policy: ProfilePolicy::Inherit, + }, + /*The KVM docs recommend always setting this (https://docs.kernel.org/virt/kvm/x86/errata.html#kvm-get-supported-cpuid-issues). + + Keep in mind however that in my limited understanding this isn't about enabling or disabling a feature, but it describes critical behaviour. + Hence I am wondering whether it should be a hard error if the host does not have this bit set, but the desired CPU profile does? + + TODO: Check what KVM_GET_SUPPORTED_CPUID actually gives here (on the Skylake server) + */ + ValueDefinition { + short: "fdp_excptn_only", + description: "FPU Data Pointer updated only on x87 exceptions", + bits_range: (6, 6), + policy: ProfilePolicy::Passthrough, + }, + ValueDefinition { + short: "smep", + description: "Supervisor Mode Execution Protection", + bits_range: (7, 7), + policy: ProfilePolicy::Inherit, + }, + ValueDefinition { + short: "bmi2", + description: "Bit manipulation extensions group 2", + bits_range: (8, 8), + policy: ProfilePolicy::Inherit, + }, + ValueDefinition { + short: "erms", + description: "Enhanced REP MOVSB/STOSB", + bits_range: (9, 9), + policy: ProfilePolicy::Inherit, + }, + /* + The instruction enabled by this seems rather powerful. Are we sure that doesn't have security implications? + I included this because it seems like QEMU does (to the best of my understanding). + */ + ValueDefinition { + short: "invpcid", + description: "INVPCID instruction (Invalidate Processor Context ID)", + bits_range: (10, 10), + policy: ProfilePolicy::Inherit, + }, + // This is TSX related. TSX is riddled with CVEs: Consider two profiles (one with it disabled) or an opt-in/out feature. + ValueDefinition { + short: "rtm", + description: "Intel restricted transactional memory", + bits_range: (11, 11), + policy: ProfilePolicy::Static(0), + }, + ValueDefinition { + short: "rdt_m", + description: "Supports Intel Resource Director Technology Monitoring Capability if 1", + bits_range: (12, 12), + policy: ProfilePolicy::Static(0), + }, + // The KVM docs recommend always setting this (https://docs.kernel.org/virt/kvm/x86/errata.html#kvm-get-supported-cpuid-issues). TODO: Is it OK to just set this to 1? + ValueDefinition { + short: "zero_fcs_fds", + description: "Deprecates FPU CS and FPU DS values if 1", + bits_range: (13, 13), + policy: ProfilePolicy::Passthrough, + }, + // This has been deprecated + ValueDefinition { + short: "mpx", + description: "Intel memory protection extensions", + bits_range: (14, 14), + policy: ProfilePolicy::Static(0), + }, + // This might be useful for certain high performance applications, but it also seems like a rather niche and advanced feature. QEMU does also not automatically enable this from what we can tell. + // TODO: Should we make this OPT-IN? + ValueDefinition { + short: "rdt_a", + description: "Intel RDT-A. Supports Intel Resource Director Technology Allocation Capability if 1", + bits_range: (15, 15), + policy: ProfilePolicy::Static(0), + }, + // TODO: Do the wider avx512 zmm registers work out of the box when the hardware supports it? + ValueDefinition { + short: "avx512f", + description: "AVX-512 foundation instructions", + bits_range: (16, 16), + policy: ProfilePolicy::Inherit, + }, + ValueDefinition { + short: "avx512dq", + description: "AVX-512 double/quadword instructions", + bits_range: (17, 17), + policy: ProfilePolicy::Inherit, + }, + ValueDefinition { + short: "rdseed", + description: "RDSEED instruction", + bits_range: (18, 18), + policy: ProfilePolicy::Inherit, + }, + ValueDefinition { + short: "adx", + description: "ADCX/ADOX instructions", + bits_range: (19, 19), + policy: ProfilePolicy::Inherit, + }, + ValueDefinition { + short: "smap", + description: "Supervisor mode access prevention", + bits_range: (20, 20), + policy: ProfilePolicy::Inherit, + }, + ValueDefinition { + short: "avx512ifma", + description: "AVX-512 integer fused multiply add", + bits_range: (21, 21), + policy: ProfilePolicy::Inherit, + }, + ValueDefinition { + short: "clflushopt", + description: "CLFLUSHOPT instruction", + bits_range: (23, 23), + policy: ProfilePolicy::Inherit, + }, + ValueDefinition { + short: "clwb", + description: "CLWB instruction", + bits_range: (24, 24), + policy: ProfilePolicy::Inherit, + }, + ValueDefinition { + short: "intel_pt", + description: "Intel processor trace", + bits_range: (25, 25), + policy: ProfilePolicy::Static(0), + }, + ValueDefinition { + short: "avx512pf", + description: "AVX-512 prefetch instructions", + bits_range: (26, 26), + policy: ProfilePolicy::Inherit, + }, + ValueDefinition { + short: "avx512er", + description: "AVX-512 exponent/reciprocal instructions", + bits_range: (27, 27), + policy: ProfilePolicy::Inherit, + }, + ValueDefinition { + short: "avx512cd", + description: "AVX-512 conflict detection instructions", + bits_range: (28, 28), + policy: ProfilePolicy::Inherit, + }, + ValueDefinition { + short: "sha_ni", + description: "SHA/SHA256 instructions", + bits_range: (29, 29), + policy: ProfilePolicy::Inherit, + }, + ValueDefinition { + short: "avx512bw", + description: "AVX-512 byte/word instructions", + bits_range: (30, 30), + policy: ProfilePolicy::Inherit, + }, + ValueDefinition { + short: "avx512vl", + description: "AVX-512 VL (128/256 vector length) extensions", + bits_range: (31, 31), + policy: ProfilePolicy::Inherit, + }, + ]), + ), + ( + Parameters { + leaf: 0x7, + sub_leaf: RangeInclusive::new(0, 0), + register: CpuidReg::ECX, + }, + ValueDefinitions::new(&[ + ValueDefinition { + short: "prefetchwt1", + description: "PREFETCHWT1 (Intel Xeon Phi only)", + bits_range: (0, 0), + policy: ProfilePolicy::Static(0), + }, + ValueDefinition { + short: "avx512vbmi", + description: "AVX-512 Vector byte manipulation instructions", + bits_range: (1, 1), + policy: ProfilePolicy::Inherit, + }, + // Also set by QEMU for CPU models from what we can tell + ValueDefinition { + short: "umip", + description: "User mode instruction protection", + bits_range: (2, 2), + policy: ProfilePolicy::Inherit, + }, + // Also set by QEMU for CPU models from what we can tell + ValueDefinition { + short: "pku", + description: "Protection keys for user-space", + bits_range: (3, 3), + policy: ProfilePolicy::Inherit, + }, + ValueDefinition { + short: "ospke", + description: "OS protection keys enable", + bits_range: (4, 4), + policy: ProfilePolicy::Inherit, + }, + ValueDefinition { + short: "waitpkg", + description: "WAITPKG instructions", + bits_range: (5, 5), + policy: ProfilePolicy::Inherit, + }, + ValueDefinition { + short: "avx512_vbmi2", + description: "AVX-512 vector byte manipulation instructions group 2", + bits_range: (6, 6), + policy: ProfilePolicy::Inherit, + }, + ValueDefinition { + short: "cet_ss", + description: "CET shadow stack features", + bits_range: (7, 7), + policy: ProfilePolicy::Inherit, + }, + ValueDefinition { + short: "gfni", + description: "Galois field new instructions", + bits_range: (8, 8), + policy: ProfilePolicy::Inherit, + }, + ValueDefinition { + short: "vaes", + description: "Vector AES instructions", + bits_range: (9, 9), + policy: ProfilePolicy::Inherit, + }, + ValueDefinition { + short: "vpclmulqdq", + description: "VPCLMULQDQ 256-bit instruction support", + bits_range: (10, 10), + policy: ProfilePolicy::Inherit, + }, + ValueDefinition { + short: "avx512_vnni", + description: "Vector neural network instructions", + bits_range: (11, 11), + policy: ProfilePolicy::Inherit, + }, + ValueDefinition { + short: "avx512_bitalg", + description: "AVX-512 bitwise algorithms", + bits_range: (12, 12), + policy: ProfilePolicy::Inherit, + }, + // Seems to be TDX related which is experimental in CHV. We disable this for CPU profiles for now, but could potentially add it as an opt-in feature eventually. + ValueDefinition { + short: "tme", + description: "Intel total memory encryption", + bits_range: (13, 13), + policy: ProfilePolicy::Static(0), + }, + ValueDefinition { + short: "avx512_vpopcntdq", + description: "AVX-512: POPCNT for vectors of DWORD/QWORD", + bits_range: (14, 14), + policy: ProfilePolicy::Inherit, + }, + ValueDefinition { + short: "la57", + description: "57-bit linear addresses (five-level paging)", + bits_range: (16, 16), + policy: ProfilePolicy::Inherit, + }, + ValueDefinition { + short: "mawau_val_lm", + description: "BNDLDX/BNDSTX MAWAU value in 64-bit mode", + bits_range: (17, 21), + policy: ProfilePolicy::Static(0), + }, + // MSR related + ValueDefinition { + short: "rdpid", + description: "RDPID instruction", + bits_range: (22, 22), + policy: ProfilePolicy::Inherit, + }, + // We leave key locker support out for CPU profiles for the time being. We may want this to be opt-in in the future though + ValueDefinition { + short: "key_locker", + description: "Intel key locker support", + bits_range: (23, 23), + policy: ProfilePolicy::Static(0), + }, + ValueDefinition { + short: "bus_lock_detect", + description: "OS bus-lock detection", + bits_range: (24, 24), + policy: ProfilePolicy::Inherit, + }, + ValueDefinition { + short: "cldemote", + description: "CLDEMOTE instruction", + bits_range: (25, 25), + policy: ProfilePolicy::Inherit, + }, + ValueDefinition { + short: "movdiri", + description: "MOVDIRI instruction", + bits_range: (27, 27), + policy: ProfilePolicy::Inherit, + }, + ValueDefinition { + short: "movdir64b", + description: "MOVDIR64B instruction", + bits_range: (28, 28), + policy: ProfilePolicy::Inherit, + }, + ValueDefinition { + short: "enqcmd", + description: "Enqueue stores supported (ENQCMD{,S})", + bits_range: (29, 29), + policy: ProfilePolicy::Static(0), + }, + // SGX support is deprecated so we disable it unconditionally for CPU profiles + ValueDefinition { + short: "sgx_lc", + description: "Intel SGX launch configuration", + bits_range: (30, 30), + policy: ProfilePolicy::Static(0), + }, + ValueDefinition { + short: "pks", + description: "Protection keys for supervisor-mode pages", + bits_range: (31, 31), + policy: ProfilePolicy::Static(0), + }, + ]), + ), + ( + Parameters { + leaf: 0x7, + sub_leaf: RangeInclusive::new(0, 0), + register: CpuidReg::EDX, + }, + ValueDefinitions::new(&[ + // SGX is deprecated + ValueDefinition { + short: "sgx_keys", + description: "Intel SGX attestation services", + bits_range: (1, 1), + policy: ProfilePolicy::Static(0), + }, + ValueDefinition { + short: "avx512_4vnniw", + description: "AVX-512 neural network instructions (Intel Xeon Phi only?)", + bits_range: (2, 2), + policy: ProfilePolicy::Inherit, + }, + ValueDefinition { + short: "avx512_4fmaps", + description: "AVX-512 multiply accumulation single precision (Intel Xeon Phi only?)", + bits_range: (3, 3), + policy: ProfilePolicy::Inherit, + }, + ValueDefinition { + short: "fsrm", + description: "Fast short REP MOV", + bits_range: (4, 4), + policy: ProfilePolicy::Inherit, + }, + ValueDefinition { + short: "uintr", + description: "CPU supports user interrupts", + bits_range: (5, 5), + policy: ProfilePolicy::Static(0), + }, + ValueDefinition { + short: "avx512_vp2intersect", + description: "VP2INTERSECT{D,Q} instructions", + bits_range: (8, 8), + policy: ProfilePolicy::Inherit, + }, + // MSR related + ValueDefinition { + short: "srdbs_ctrl", + description: "SRBDS mitigation MSR available: If 1, enumerates support for the IA32_MCU_OPT_CTRL MSR and indicates that its bit 0 (RNGDS_MITG_DIS) is also supported.", + bits_range: (9, 9), + policy: ProfilePolicy::Inherit, + }, + ValueDefinition { + short: "md_clear", + description: "VERW MD_CLEAR microcode support", + bits_range: (10, 10), + policy: ProfilePolicy::Passthrough, + }, + ValueDefinition { + short: "rtm_always_abort", + description: "XBEGIN (RTM transaction) always aborts", + bits_range: (11, 11), + policy: ProfilePolicy::Static(0), + }, + ValueDefinition { + short: "tsx_force_abort", + description: "MSR TSX_FORCE_ABORT, RTM_ABORT bit, supported", + bits_range: (13, 13), + policy: ProfilePolicy::Static(0), + }, + ValueDefinition { + short: "serialize", + description: "SERIALIZE instruction", + bits_range: (14, 14), + policy: ProfilePolicy::Inherit, + }, + ValueDefinition { + short: "hybrid_cpu", + description: "The CPU is identified as a 'hybrid part'", + bits_range: (15, 15), + policy: ProfilePolicy::Inherit, + }, + // TODO: This is TSX related which is riddled with CVEs. We could consider an additional profile enabling TSX in the future, but we leave it out for now. + ValueDefinition { + short: "tsxldtrk", + description: "TSX suspend/resume load address tracking", + bits_range: (16, 16), + policy: ProfilePolicy::Static(0), + }, + // Might be relevant for confidential computing + ValueDefinition { + short: "pconfig", + description: "PCONFIG instruction", + bits_range: (18, 18), + policy: ProfilePolicy::Static(0), + }, + // MSR related + ValueDefinition { + short: "arch_lbr", + description: "Intel architectural LBRs", + bits_range: (19, 19), + policy: ProfilePolicy::Static(0), + }, + ValueDefinition { + short: "ibt", + description: "CET indirect branch tracking", + bits_range: (20, 20), + policy: ProfilePolicy::Inherit, + }, + ValueDefinition { + short: "amx_bf16", + description: "AMX-BF16: tile bfloat16 support", + bits_range: (22, 22), + policy: ProfilePolicy::Inherit, + }, + ValueDefinition { + short: "avx512_fp16", + description: "AVX-512 FP16 instructions", + bits_range: (23, 23), + policy: ProfilePolicy::Inherit, + }, + ValueDefinition { + short: "amx_tile", + description: "AMX-TILE: tile architecture support", + bits_range: (24, 24), + policy: ProfilePolicy::Inherit, + }, + ValueDefinition { + short: "amx_int8", + description: "AMX-INT8: tile 8-bit integer support", + bits_range: (25, 25), + policy: ProfilePolicy::Inherit, + }, + // MSR related + ValueDefinition { + short: "spec_ctrl", + description: "Speculation Control (IBRS/IBPB: indirect branch restrictions)", + bits_range: (26, 26), + policy: ProfilePolicy::Inherit, + }, + // MSR related + ValueDefinition { + short: "intel_stibp", + description: "Single thread indirect branch predictors", + bits_range: (27, 27), + policy: ProfilePolicy::Passthrough, + }, + // MSR related + ValueDefinition { + short: "flush_l1d", + description: "FLUSH L1D cache: IA32_FLUSH_CMD MSR", + bits_range: (28, 28), + policy: ProfilePolicy::Passthrough, + }, + // MSR related + ValueDefinition { + short: "arch_capabilities", + description: "Intel IA32_ARCH_CAPABILITIES MSR", + bits_range: (29, 29), + policy: ProfilePolicy::Inherit, + }, + // MSR related + ValueDefinition { + short: "core_capabilities", + description: "IA32_CORE_CAPABILITIES MSR", + bits_range: (30, 30), + policy: ProfilePolicy::Inherit, + }, + // MSR related + ValueDefinition { + short: "spec_ctrl_ssbd", + description: "Speculative store bypass disable", + bits_range: (31, 31), + policy: ProfilePolicy::Inherit, + }, + ]), + ), + // =================================================================================================================== + // Structured Extended Feature Flags Enumeration Sub-Leaf 1 + // =================================================================================================================== + ( + Parameters { + leaf: 0x7, + sub_leaf: RangeInclusive::new(1, 1), + register: CpuidReg::EAX, + }, + ValueDefinitions::new(&[ + ValueDefinition { + short: "sha512", + description: "SHA-512 extensions", + bits_range: (0, 0), + policy: ProfilePolicy::Inherit, + }, + ValueDefinition { + short: "sm3", + description: "SM3 instructions", + bits_range: (1, 1), + policy: ProfilePolicy::Inherit, + }, + ValueDefinition { + short: "sm4", + description: "SM4 instructions", + bits_range: (2, 2), + policy: ProfilePolicy::Inherit, + }, + // RAO-INT is deprecated and removed from most compilers as far as we are aware + ValueDefinition { + short: "RAO-INT", + description: "RAO-INT instructions", + bits_range: (3, 3), + policy: ProfilePolicy::Static(0), + }, + ValueDefinition { + short: "avx_vnni", + description: "AVX-VNNI instructions", + bits_range: (4, 4), + policy: ProfilePolicy::Inherit, + }, + ValueDefinition { + short: "avx512_bf16", + description: "AVX-512 bfloat16 instructions", + bits_range: (5, 5), + policy: ProfilePolicy::Inherit, + }, + /* + Not set in QEMU from what we can tell, but according seems to be fine to expose this to guests + if we understood https://www.phoronix.com/news/Intel-Linux-LASS-KVM correctly. It is also + our understanding that this feature can enable guests opting in to more security (possibly at the cost of some performance). + */ + ValueDefinition { + short: "lass", + description: "Linear address space separation", + bits_range: (6, 6), + policy: ProfilePolicy::Inherit, + }, + ValueDefinition { + short: "cmpccxadd", + description: "CMPccXADD instructions", + bits_range: (7, 7), + policy: ProfilePolicy::Inherit, + }, + ValueDefinition { + short: "arch_perfmon_ext", + description: "ArchPerfmonExt: leaf 0x23 is supported", + bits_range: (8, 8), + policy: ProfilePolicy::Static(0), + }, + ValueDefinition { + short: "fzrm", + description: "Fast zero-length REP MOVSB", + bits_range: (10, 10), + policy: ProfilePolicy::Inherit, + }, + ValueDefinition { + short: "fsrs", + description: "Fast short REP STOSB", + bits_range: (11, 11), + policy: ProfilePolicy::Inherit, + }, + ValueDefinition { + short: "fsrc", + description: "Fast Short REP CMPSB/SCASB", + bits_range: (12, 12), + policy: ProfilePolicy::Inherit, + }, + ValueDefinition { + short: "fred", + description: "FRED: Flexible return and event delivery transitions", + bits_range: (17, 17), + policy: ProfilePolicy::Inherit, + }, + ValueDefinition { + short: "lkgs", + description: "LKGS: Load 'kernel' (userspace) GS", + bits_range: (18, 18), + policy: ProfilePolicy::Inherit, + }, + ValueDefinition { + short: "wrmsrns", + description: "WRMSRNS instruction (WRMSR-non-serializing)", + bits_range: (19, 19), + policy: ProfilePolicy::Inherit, + }, + ValueDefinition { + short: "nmi_src", + description: "NMI-source reporting with FRED event data", + bits_range: (20, 20), + policy: ProfilePolicy::Static(0), + }, + ValueDefinition { + short: "amx_fp16", + description: "AMX-FP16: FP16 tile operations", + bits_range: (21, 21), + policy: ProfilePolicy::Inherit, + }, + ValueDefinition { + short: "hreset", + description: "History reset support", + bits_range: (22, 22), + policy: ProfilePolicy::Static(0), + }, + ValueDefinition { + short: "avx_ifma", + description: "Integer fused multiply add", + bits_range: (23, 23), + policy: ProfilePolicy::Inherit, + }, + ValueDefinition { + short: "lam", + description: "Linear address masking", + bits_range: (26, 26), + policy: ProfilePolicy::Inherit, + }, + ValueDefinition { + short: "rd_wr_msrlist", + description: "RDMSRLIST/WRMSRLIST instructions", + bits_range: (27, 27), + policy: ProfilePolicy::Inherit, + }, + ValueDefinition { + short: "invd_disable_post_bios_done", + description: "If 1, supports INVD execution prevention after BIOS Done", + bits_range: (30, 30), + policy: ProfilePolicy::Inherit, + }, + ValueDefinition { + short: "movrs", + description: "MOVRS", + bits_range: (31, 31), + policy: ProfilePolicy::Inherit, + }, + ]), + ), + ( + Parameters { + leaf: 0x7, + sub_leaf: RangeInclusive::new(1, 1), + register: CpuidReg::EBX, + }, + ValueDefinitions::new(&[ + ValueDefinition { + short: "intel_ppin", + description: "Protected processor inventory number (PPIN{,_CTL} MSRs)", + bits_range: (0, 0), + policy: ProfilePolicy::Static(0), + }, + // MSR related + ValueDefinition { + short: "pbndkb", + description: "PBNDKB instruction supported and enumerates the existence of the IA32_TSE_CAPABILITY MSR", + bits_range: (1, 1), + policy: ProfilePolicy::Static(0), + }, + ]), + ), + // TODO: Missing entry for (0x7, 1, ECX) + // Make the whole register zero though + // + ( + Parameters { + leaf: 0x7, + sub_leaf: RangeInclusive::new(1, 1), + register: CpuidReg::EDX, + }, + ValueDefinitions::new(&[ + ValueDefinition { + short: "avx_vnni_int8", + description: "AVX-VNNI-INT8 instructions", + bits_range: (4, 4), + policy: ProfilePolicy::Inherit, + }, + ValueDefinition { + short: "avx_ne_convert", + description: "AVX-NE-CONVERT instructions", + bits_range: (5, 5), + policy: ProfilePolicy::Inherit, + }, + // NOTE: AMX currently requires opt-in, even for the host CPU profile. We still inherit this value for profiles as the value will be zeroed out if the user has not opted in for "amx" via CpuFeatures. + ValueDefinition { + short: "amx_complex", + description: "AMX-COMPLEX instructions (starting from Granite Rapids)", + bits_range: (8, 8), + policy: ProfilePolicy::Inherit, + }, + ValueDefinition { + short: "avx_vnni_int16", + description: "AVX-VNNI-INT16 instructions", + bits_range: (10, 10), + policy: ProfilePolicy::Inherit, + }, + ValueDefinition { + short: "utmr", + description: "If 1, supports user-timer events", + bits_range: (13, 13), + policy: ProfilePolicy::Static(0), + }, + ValueDefinition { + short: "prefetchit_0_1", + description: "PREFETCHIT0/1 instructions", + bits_range: (14, 14), + policy: ProfilePolicy::Inherit, + }, + // MSR related + ValueDefinition { + short: "user_msr", + description: "If 1, supports the URDMSR and UWRMSR instructions", + bits_range: (15, 15), + policy: ProfilePolicy::Static(0), + }, + ValueDefinition { + short: "uiret_uif", + description: "If 1, UIRET sets UIF to the value of bit 1 of the RFLAGS image loaded from the stack", + bits_range: (15, 15), + policy: ProfilePolicy::Static(0), + }, + ValueDefinition { + short: "cet_sss", + description: "CET supervisor shadow stacks safe to use", + bits_range: (18, 18), + policy: ProfilePolicy::Inherit, + }, + ValueDefinition { + short: "avx10", + description: "If 1, supports the Intel AVX10 instructions and indicates the presence of leaf 0x24", + bits_range: (19, 19), + policy: ProfilePolicy::Inherit, + }, + ValueDefinition { + short: "apx_f", + description: "If 1, the processor provides foundational support for Intel Advanced Performance Extensions", + bits_range: (21, 21), + policy: ProfilePolicy::Inherit, + }, + ValueDefinition { + short: "mwait", + description: "If 1, MWAIT is supported even if (0x1 ECX bit 3 (monitor) is enumerated as 0)", + bits_range: (23, 23), + policy: ProfilePolicy::Static(0), + }, + // MSR related + ValueDefinition { + short: "slsm", + description: "If 1, indicates bit 0 of the IA32_INTEGRITY_STATUS MSR is supported. Bit 0 of this MSR indicates whether static lockstep is active on this logical processor", + bits_range: (24, 24), + policy: ProfilePolicy::Static(0), + }, + ]), + ), + // =================================================================================================================== + // Structured Extended Feature Flags Enumeration Sub-Leaf 2 + // =================================================================================================================== + ( + Parameters { + leaf: 0x7, + sub_leaf: RangeInclusive::new(2, 2), + register: CpuidReg::EDX, + }, + ValueDefinitions::new(&[ + // MSR related + ValueDefinition { + short: "intel_psfd", + description: "If 1, indicates bit 7 of the IA32_SPEC_CTRL_MSR is supported. Bit 7 of this MSR disables fast store forwarding predictor without disabling speculative store bypass", + bits_range: (0, 0), + policy: ProfilePolicy::Inherit, + }, + // MSR related + ValueDefinition { + short: "ipred_ctrl", + description: "MSR bits IA32_SPEC_CTRL.IPRED_DIS_{U,S}", + bits_range: (1, 1), + policy: ProfilePolicy::Inherit, + }, + // MSR related + ValueDefinition { + short: "rrsba_ctrl", + description: "MSR bits IA32_SPEC_CTRL.RRSBA_DIS_{U,S}", + bits_range: (2, 2), + policy: ProfilePolicy::Inherit, + }, + // MSR related + ValueDefinition { + short: "ddp_ctrl", + description: "MSR bit IA32_SPEC_CTRL.DDPD_U", + bits_range: (3, 3), + policy: ProfilePolicy::Inherit, + }, + // MSR related + ValueDefinition { + short: "bhi_ctrl", + description: "MSR bit IA32_SPEC_CTRL.BHI_DIS_S", + bits_range: (4, 4), + policy: ProfilePolicy::Inherit, + }, + // MSR related + ValueDefinition { + short: "mcdt_no", + description: "MCDT mitigation not needed", + bits_range: (5, 5), + policy: ProfilePolicy::Inherit, + }, + // MSR related + ValueDefinition { + short: "uclock_disable", + description: "UC-lock disable is supported", + bits_range: (6, 6), + policy: ProfilePolicy::Inherit, + }, + ]), + ), + // =================================================================================================================== + // Direct Cache Access Information + // =================================================================================================================== + ( + Parameters { + leaf: 0x9, + sub_leaf: RangeInclusive::new(0, 0), + register: CpuidReg::EAX, + }, + ValueDefinitions::new(&[ + // MSR related + ValueDefinition { + short: "dca_cap_msr_value", + description: "Value of bits [31:0] of IA32_PLATFORM_DCA_CAP MSR (address 1f8H)", + bits_range: (0, 31), + policy: ProfilePolicy::Static(0), + }, + ]), + ), + // =================================================================================================================== + // Architectural Performance Monitoring + // =================================================================================================================== + // We will just zero out everything to do with PMU for CPU profiles + ( + Parameters { + leaf: 0xa, + sub_leaf: RangeInclusive::new(0, 0), + register: CpuidReg::EAX, + }, + ValueDefinitions::new(&[ + ValueDefinition { + short: "pmu_version", + description: "Performance monitoring unit version ID", + bits_range: (0, 7), + policy: ProfilePolicy::Static(0), + }, + ValueDefinition { + short: "pmu_n_gcounters", + description: "Number of general PMU counters per logical CPU", + bits_range: (8, 15), + policy: ProfilePolicy::Static(0), + }, + ValueDefinition { + short: "pmu_gcounters_nbits", + description: "Bitwidth of PMU general counters", + bits_range: (16, 23), + policy: ProfilePolicy::Static(0), + }, + ValueDefinition { + short: "pmu_cpuid_ebx_bits", + description: "Length of leaf 0xa EBX bit vector", + bits_range: (24, 31), + policy: ProfilePolicy::Static(0), + }, + ]), + ), + ( + Parameters { + leaf: 0xa, + sub_leaf: RangeInclusive::new(0, 0), + register: CpuidReg::EBX, + }, + ValueDefinitions::new(&[ + ValueDefinition { + short: "no_core_cycle_evt", + description: "Core cycle event not available", + bits_range: (0, 0), + policy: ProfilePolicy::Static(0), + }, + ValueDefinition { + short: "no_insn_retired_evt", + description: "Instruction retired event not available", + bits_range: (1, 1), + policy: ProfilePolicy::Static(0), + }, + ValueDefinition { + short: "no_refcycle_evt", + description: "Reference cycles event not available", + bits_range: (2, 2), + policy: ProfilePolicy::Static(0), + }, + ValueDefinition { + short: "no_llc_ref_evt", + description: "LLC-reference event not available", + bits_range: (3, 3), + policy: ProfilePolicy::Static(0), + }, + ValueDefinition { + short: "no_llc_miss_evt", + description: "LLC-misses event not available", + bits_range: (4, 4), + policy: ProfilePolicy::Static(0), + }, + ValueDefinition { + short: "no_br_insn_ret_evt", + description: "Branch instruction retired event not available", + bits_range: (5, 5), + policy: ProfilePolicy::Static(0), + }, + ValueDefinition { + short: "no_br_mispredict_evt", + description: "Branch mispredict retired event not available", + bits_range: (6, 6), + policy: ProfilePolicy::Static(0), + }, + ValueDefinition { + short: "no_td_slots_evt", + description: "Topdown slots event not available", + bits_range: (7, 7), + policy: ProfilePolicy::Static(0), + }, + ]), + ), + ( + Parameters { + leaf: 0xa, + sub_leaf: RangeInclusive::new(0, 0), + register: CpuidReg::ECX, + }, + ValueDefinitions::new(&[ValueDefinition { + short: "pmu_fcounters_bitmap", + description: "Fixed-function PMU counters support bitmap", + bits_range: (0, 31), + policy: ProfilePolicy::Static(0), + }]), + ), + ( + Parameters { + leaf: 0xa, + sub_leaf: RangeInclusive::new(0, 0), + register: CpuidReg::EDX, + }, + ValueDefinitions::new(&[ + ValueDefinition { + short: "pmu_n_fcounters", + description: "Number of fixed PMU counters", + bits_range: (0, 4), + policy: ProfilePolicy::Static(0), + }, + ValueDefinition { + short: "pmu_fcounters_nbits", + description: "Bitwidth of PMU fixed counters", + bits_range: (5, 12), + policy: ProfilePolicy::Static(0), + }, + ValueDefinition { + short: "anythread_depr", + description: "AnyThread deprecation", + bits_range: (15, 15), + policy: ProfilePolicy::Static(0), + }, + ]), + ), + // =================================================================================================================== + // Extended Topology Enumeration + // =================================================================================================================== + + // Leaf 0xB must be set by CHV itself (and do all necessary checks) + ( + Parameters { + leaf: 0xb, + sub_leaf: RangeInclusive::new(0, u32::MAX), + register: CpuidReg::EAX, + }, + ValueDefinitions::new(&[ValueDefinition { + short: "x2apic_id_shift", + description: "Bit width of this level (previous levels inclusive)", + bits_range: (0, 4), + policy: ProfilePolicy::Passthrough, + }]), + ), + // Set by VMM/user provided config + ( + Parameters { + leaf: 0xb, + sub_leaf: RangeInclusive::new(0, u32::MAX), + register: CpuidReg::EBX, + }, + ValueDefinitions::new(&[ValueDefinition { + short: "domain_lcpus_count", + description: "Logical CPUs count across all instances of this domain", + bits_range: (0, 15), + policy: ProfilePolicy::Passthrough, + }]), + ), + // Set by VMM/user provided config + ( + Parameters { + leaf: 0xb, + sub_leaf: RangeInclusive::new(0, u32::MAX), + register: CpuidReg::ECX, + }, + ValueDefinitions::new(&[ + ValueDefinition { + short: "domain_nr", + description: "This domain level (subleaf ID)", + bits_range: (0, 7), + policy: ProfilePolicy::Passthrough, + }, + ValueDefinition { + short: "domain_type", + description: "This domain type", + bits_range: (8, 15), + policy: ProfilePolicy::Passthrough, + }, + ]), + ), + // Set by VMM/user provided config + ( + Parameters { + leaf: 0xb, + sub_leaf: RangeInclusive::new(0, u32::MAX), + register: CpuidReg::EDX, + }, + ValueDefinitions::new(&[ValueDefinition { + short: "x2apic_id", + description: "x2APIC ID of current logical CPU", + bits_range: (0, 31), + policy: ProfilePolicy::Passthrough, + }]), + ), + // =================================================================================================================== + // Processor Extended State Enumeration Main Leaf + // =================================================================================================================== + // TODO: Implement CPUID compatibility checks in CHV for this leaf + ( + Parameters { + leaf: 0xd, + sub_leaf: RangeInclusive::new(0, 0), + register: CpuidReg::EAX, + }, + ValueDefinitions::new(&[ + ValueDefinition { + short: "xcr0_x87", + description: "XCR0.X87 (bit 0) supported", + bits_range: (0, 0), + policy: ProfilePolicy::Inherit, + }, + ValueDefinition { + short: "xcr0_sse", + description: "XCR0.SEE (bit 1) supported", + bits_range: (1, 1), + policy: ProfilePolicy::Inherit, + }, + ValueDefinition { + short: "xcr0_avx", + description: "XCR0.AVX (bit 2) supported", + bits_range: (2, 2), + policy: ProfilePolicy::Inherit, + }, + // MPX is deprecated + ValueDefinition { + short: "xcr0_mpx_bndregs", + description: "XCR0.BNDREGS (bit 3) supported (MPX BND0-BND3 registers)", + bits_range: (3, 3), + policy: ProfilePolicy::Static(0), + }, + // MPX is deprecated + ValueDefinition { + short: "xcr0_mpx_bndcsr", + description: "XCR0.BNDCSR (bit 4) supported (MPX BNDCFGU/BNDSTATUS registers)", + bits_range: (4, 4), + policy: ProfilePolicy::Static(0), + }, + ValueDefinition { + short: "xcr0_avx512_opmask", + description: "XCR0.OPMASK (bit 5) supported (AVX-512 k0-k7 registers)", + bits_range: (5, 5), + policy: ProfilePolicy::Inherit, + }, + ValueDefinition { + short: "xcr0_avx512_zmm_hi256", + description: "XCR0.ZMM_Hi256 (bit 6) supported (AVX-512 ZMM0->ZMM7/15 registers)", + bits_range: (6, 6), + policy: ProfilePolicy::Inherit, + }, + ValueDefinition { + short: "xcr0_avx512_hi16_zmm", + description: "XCR0.HI16_ZMM (bit 7) supported (AVX-512 ZMM16->ZMM31 registers)", + bits_range: (7, 7), + policy: ProfilePolicy::Inherit, + }, + // MSR related + ValueDefinition { + short: "xcr0_ia32_xss", + description: "XCR0.IA32_XSS (bit 8) used for IA32_XSS", + bits_range: (8, 8), + policy: ProfilePolicy::Inherit, + }, + ValueDefinition { + short: "xcr0_pkru", + description: "XCR0.PKRU (bit 9) supported (XSAVE PKRU registers)", + bits_range: (9, 9), + policy: ProfilePolicy::Inherit, + }, + ValueDefinition { + short: "xcr0_ia32_xss_bits", + description: "XCR0.IA32_XSS (bit 10 - 16) used for IA32_XSS", + bits_range: (10, 16), + policy: ProfilePolicy::Inherit, + }, + // NOTE: AMX currently requires opt-in, even for the host CPU profile. We still inherit this value for profiles and modify this value at runtime if AMX is not enabled by the user. + ValueDefinition { + short: "xcr0_tileconfig", + description: "XCR0.TILECONFIG (bit 17) supported (AMX can manage TILECONFIG)", + bits_range: (17, 17), + policy: ProfilePolicy::Inherit, + }, + // NOTE: AMX currently requires opt-in, even for the host CPU profile. We still inherit this value for profiles and modify this value at runtime if AMX is not ebabled by the user. + ValueDefinition { + short: "xcr0_tiledata", + description: "XCR0.TILEDATA (bit 18) supported (AMX can manage TILEDATA)", + bits_range: (18, 18), + policy: ProfilePolicy::Inherit, + }, + ]), + ), + ( + Parameters { + leaf: 0xd, + sub_leaf: RangeInclusive::new(0, 0), + register: CpuidReg::EBX, + }, + // This value can be changed by the OS and must thus be passthrough + ValueDefinitions::new(&[ValueDefinition { + short: "xsave_sz_xcr0_enabled", + description: "XSAVE/XRSTOR area byte size, for XCR0 enabled features", + bits_range: (0, 31), + policy: ProfilePolicy::Passthrough, + }]), + ), + ( + Parameters { + leaf: 0xd, + sub_leaf: RangeInclusive::new(0, 0), + register: CpuidReg::ECX, + }, + // This may be passthrough because we restrict each individual state component + ValueDefinitions::new(&[ValueDefinition { + short: "xsave_sz_max", + description: "XSAVE/XRSTOR area max byte size, all CPU features", + bits_range: (0, 31), + policy: ProfilePolicy::Passthrough, + }]), + ), + ( + Parameters { + leaf: 0xd, + sub_leaf: RangeInclusive::new(0, 0), + register: CpuidReg::EDX, + }, + // TODO: Do we know of any state components corresponding to the upper bits in XCR0? Perhaps it would be + // better to have `ProfilePolicy::Static(0)` here? + ValueDefinitions::new(&[ValueDefinition { + short: "xcr0_upper_bits", + description: "Reports the valid bit fields of the upper 32 bits of the XCR0 register", + bits_range: (0, 31), + policy: ProfilePolicy::Inherit, + }]), + ), + // =================================================================================================================== + // Processor Extended State Enumeration Sub-leaf 1 + // =================================================================================================================== + ( + Parameters { + leaf: 0xd, + sub_leaf: RangeInclusive::new(1, 1), + register: CpuidReg::EAX, + }, + ValueDefinitions::new(&[ + ValueDefinition { + short: "xsaveopt", + description: "XSAVEOPT instruction", + bits_range: (0, 0), + policy: ProfilePolicy::Inherit, + }, + ValueDefinition { + short: "xsavec", + description: "XSAVEC instruction", + bits_range: (1, 1), + policy: ProfilePolicy::Inherit, + }, + ValueDefinition { + short: "xgetbv1", + description: "XGETBV instruction with ECX = 1", + bits_range: (2, 2), + policy: ProfilePolicy::Inherit, + }, + // TODO: Can this have security implications in terms of supervisor state getting exposed? + ValueDefinition { + short: "xsaves", + description: "XSAVES/XRSTORS instructions (and XSS MSR)", + bits_range: (3, 3), + policy: ProfilePolicy::Inherit, + }, + ValueDefinition { + short: "xfd", + description: "Extended feature disable support", + bits_range: (4, 4), + policy: ProfilePolicy::Inherit, + }, + ]), + ), + ( + Parameters { + leaf: 0xd, + sub_leaf: RangeInclusive::new(1, 1), + register: CpuidReg::EBX, + }, + ValueDefinitions::new(&[ + /*NOTE: This will depend on which CPU features (in CHV) are enabled and pre-computation can potentially lead to a combinatorial explosion. Luckily we can deal with each component (and its size) separately, hence we can just passthrough whatever we get from the host here.*/ + ValueDefinition { + short: "xsave_sz_xcr0_xmms_enabled", + description: "XSAVE area size, all XCR0 and IA32_XSS features enabled", + bits_range: (0, 31), + policy: ProfilePolicy::Passthrough, + }, + ]), + ), + ( + Parameters { + leaf: 0xd, + sub_leaf: RangeInclusive::new(1, 1), + register: CpuidReg::ECX, + }, + /* Reports the supported bits of the lower IA32_XSS MSR. IA32_XSS[n] can be set to 1 only if ECX[n] = 1*/ + ValueDefinitions::new(&[ + ValueDefinition { + short: "xcr0_7bits", + description: "Used for XCR0", + bits_range: (0, 7), + policy: ProfilePolicy::Inherit, + }, + ValueDefinition { + short: "xss_pt", + description: "PT state, supported", + bits_range: (8, 8), + policy: ProfilePolicy::Inherit, + }, + ValueDefinition { + short: "xcr0_bit9", + description: "Used for XCR0", + bits_range: (9, 9), + policy: ProfilePolicy::Inherit, + }, + ValueDefinition { + short: "xss_pasid", + description: "PASID state, supported", + bits_range: (10, 10), + policy: ProfilePolicy::Inherit, + }, + ValueDefinition { + short: "xss_cet_u", + description: "CET user state, supported", + bits_range: (11, 11), + policy: ProfilePolicy::Inherit, + }, + ValueDefinition { + short: "xss_cet_p", + description: "CET supervisor state, supported", + bits_range: (12, 12), + policy: ProfilePolicy::Inherit, + }, + ValueDefinition { + short: "xss_hdc", + description: "HDC state, supported", + bits_range: (13, 13), + policy: ProfilePolicy::Inherit, + }, + ValueDefinition { + short: "xss_uintr", + description: "UINTR state, supported", + bits_range: (14, 14), + policy: ProfilePolicy::Inherit, + }, + ValueDefinition { + short: "xss_lbr", + description: "LBR state, supported", + bits_range: (15, 15), + policy: ProfilePolicy::Inherit, + }, + ValueDefinition { + short: "xss_hwp", + description: "HWP state, supported", + bits_range: (16, 16), + policy: ProfilePolicy::Inherit, + }, + ValueDefinition { + short: "xcr0_bits", + description: "Used for XCR0", + bits_range: (17, 18), + policy: ProfilePolicy::Inherit, + }, + ]), + ), + ( + Parameters { + leaf: 0xd, + sub_leaf: RangeInclusive::new(1, 1), + register: CpuidReg::EDX, + }, + /* Reports the supported bits of the upper 32 bits of the IA32_XSS MSR. IA32_XSS[n + 32 ] can be set to 1 only if EDX[n] = 1*/ + ValueDefinitions::new(&[ValueDefinition { + short: "ia32_xss_upper", + description: " Reports the supported bits of the upper 32 bits of the IA32_XSS MSR. IA32_XSS[n + 32 ] can be set to 1 only if EDX[n] = 1", + bits_range: (0, 31), + policy: ProfilePolicy::Inherit, + }]), + ), + // =================================================================================================================== + // Processor Extended State Enumeration Sub-leaves + // =================================================================================================================== + + /* LEAF 0xd sub-leaf n >=2 : + If ECX contains an invalid sub-leaf index, EAX/EBX/ECX/EDX return 0. Sub-leaf n (0 ≤ n ≤ 31) is + invalid if sub-leaf 0 returns 0 in EAX[n] and sub-leaf 1 returns 0 in ECX[n]. Sub-leaf n (32 ≤ n ≤ 63) + is invalid if sub-leaf 0 returns 0 in EDX[n-32] and sub-leaf 1 returns 0 in EDX[n-32]. + */ + ( + Parameters { + leaf: 0xd, + sub_leaf: RangeInclusive::new(2, 63), + register: CpuidReg::EAX, + }, + ValueDefinitions::new(&[ValueDefinition { + short: "xsave_sz", + description: "Size of save area for subleaf-N feature, in bytes", + bits_range: (0, 31), + policy: ProfilePolicy::Inherit, + }]), + ), + ( + Parameters { + leaf: 0xd, + sub_leaf: RangeInclusive::new(2, 63), + register: CpuidReg::EBX, + }, + ValueDefinitions::new(&[ValueDefinition { + short: "xsave_offset", + description: "Offset of save area for subleaf-N feature, in bytes", + bits_range: (0, 31), + policy: ProfilePolicy::Inherit, + }]), + ), + ( + Parameters { + leaf: 0xd, + sub_leaf: RangeInclusive::new(2, 63), + register: CpuidReg::ECX, + }, + ValueDefinitions::new(&[ + ValueDefinition { + short: "is_xss_bit", + description: "Subleaf N describes an XSS bit, otherwise XCR0 bit", + bits_range: (0, 0), + policy: ProfilePolicy::Inherit, + }, + ValueDefinition { + short: "compacted_xsave_64byte_aligned", + description: "When compacted, subleaf-N feature XSAVE area is 64-byte aligned", + bits_range: (1, 1), + policy: ProfilePolicy::Inherit, + }, + ValueDefinition { + short: "xfd_faulting", + description: "Indicates support for xfd faulting", + bits_range: (2, 2), + policy: ProfilePolicy::Inherit, + }, + ]), + ), + // Intel MPX is deprecated hence we zero out these sub-leaves + ( + Parameters { + leaf: 0xd, + sub_leaf: RangeInclusive::new(3, 4), + register: CpuidReg::EAX, + }, + ValueDefinitions::new(&[ValueDefinition { + short: "0xd-3-4-eax-mpx-zero", + description: "This leaf has been zeroed out because MPX state components are disabled", + bits_range: (0, 31), + policy: ProfilePolicy::Static(0), + }]), + ), + ( + Parameters { + leaf: 0xd, + sub_leaf: RangeInclusive::new(3, 4), + register: CpuidReg::EBX, + }, + ValueDefinitions::new(&[ValueDefinition { + short: "0xd-3-4-ebx-mpx-zero", + description: "This leaf has been zeroed out because MPX state components are disabled", + bits_range: (0, 31), + policy: ProfilePolicy::Static(0), + }]), + ), + ( + Parameters { + leaf: 0xd, + sub_leaf: RangeInclusive::new(3, 4), + register: CpuidReg::ECX, + }, + ValueDefinitions::new(&[ValueDefinition { + short: "0xd-3-4-ecx-mpx-zero", + description: "This leaf has been zeroed out because MPX state components are disabled", + bits_range: (0, 31), + policy: ProfilePolicy::Static(0), + }]), + ), + ( + Parameters { + leaf: 0xd, + sub_leaf: RangeInclusive::new(3, 4), + register: CpuidReg::EDX, + }, + ValueDefinitions::new(&[ValueDefinition { + short: "0xd-3-4-edx-mpx-zero", + description: "This leaf has been zeroed out because MPX state components are disabled", + bits_range: (0, 31), + policy: ProfilePolicy::Static(0), + }]), + ), + // NOTE: Sub-leaves 17 & 18 are AMX related and we will alter the adjustments corresponding to + // the policy declared here at runtime for those values. + ( + Parameters { + leaf: 0xd, + sub_leaf: RangeInclusive::new(5, 63), + register: CpuidReg::EAX, + }, + ValueDefinitions::new(&[ValueDefinition { + short: "xsave_sz", + description: "Size of save area for subleaf-N feature, in bytes", + bits_range: (0, 31), + policy: ProfilePolicy::Inherit, + }]), + ), + ( + Parameters { + leaf: 0xd, + sub_leaf: RangeInclusive::new(5, 63), + register: CpuidReg::EBX, + }, + ValueDefinitions::new(&[ValueDefinition { + short: "xsave_offset", + description: "Offset of save area for subleaf-N feature, in bytes", + bits_range: (0, 31), + policy: ProfilePolicy::Inherit, + }]), + ), + ( + Parameters { + leaf: 0xd, + sub_leaf: RangeInclusive::new(5, 63), + register: CpuidReg::ECX, + }, + ValueDefinitions::new(&[ + ValueDefinition { + short: "is_xss_bit", + description: "Subleaf N describes an XSS bit, otherwise XCR0 bit", + bits_range: (0, 0), + policy: ProfilePolicy::Inherit, + }, + ValueDefinition { + short: "compacted_xsave_64byte_aligned", + description: "When compacted, subleaf-N feature XSAVE area is 64-byte aligned", + bits_range: (1, 1), + policy: ProfilePolicy::Inherit, + }, + ValueDefinition { + short: "xfd_faulting", + description: "Indicates support for xfd faulting", + bits_range: (2, 2), + policy: ProfilePolicy::Inherit, + }, + ]), + ), + // =================================================================================================================== + // Intel Resource Director Technology Monitoring Enumeration + // =================================================================================================================== + ( + Parameters { + leaf: 0xf, + sub_leaf: RangeInclusive::new(0, 0), + register: CpuidReg::EBX, + }, + ValueDefinitions::new(&[ValueDefinition { + short: "core_rmid_max", + description: "RMID max, within this core, all types (0-based)", + bits_range: (0, 31), + policy: ProfilePolicy::Static(0), + }]), + ), + ( + Parameters { + leaf: 0xf, + sub_leaf: RangeInclusive::new(0, 0), + register: CpuidReg::EDX, + }, + ValueDefinitions::new(&[ValueDefinition { + short: "l3-cache-rdt-monitoring", + description: "Supports L3 Cache Intel RDT Monitoring if 1", + bits_range: (1, 1), + policy: ProfilePolicy::Static(0), + }]), + ), + // =================================================================================================================== + // Intel Resource Director Technology Monitoring Enumeration Sub-leaf 1 + // =================================================================================================================== + ( + Parameters { + leaf: 0xf, + sub_leaf: RangeInclusive::new(1, 1), + register: CpuidReg::EAX, + }, + ValueDefinitions::new(&[ + ValueDefinition { + short: "l3c_qm_bitwidth", + description: "L3 QoS-monitoring counter bitwidth (24-based)", + bits_range: (0, 7), + policy: ProfilePolicy::Static(0), + }, + ValueDefinition { + short: "l3c_qm_overflow_bit", + description: "QM_CTR MSR bit 61 is an overflow bit", + bits_range: (8, 8), + policy: ProfilePolicy::Static(0), + }, + ValueDefinition { + short: "l3c_qm_non_cpu_agent", + description: "If 1, indicates the presence of non-CPU agent Intel RDT CTM support", + bits_range: (9, 9), + policy: ProfilePolicy::Static(0), + }, + ValueDefinition { + short: "l3c_qm_non_cpu_agent", + description: "If 1, indicates the presence of non-CPU agent Intel RDT MBM support", + bits_range: (10, 10), + policy: ProfilePolicy::Static(0), + }, + ]), + ), + ( + Parameters { + leaf: 0xf, + sub_leaf: RangeInclusive::new(1, 1), + register: CpuidReg::EBX, + }, + ValueDefinitions::new(&[ValueDefinition { + short: "l3c_qm_conver_factor", + description: "QM_CTR MSR conversion factor to bytes", + bits_range: (0, 31), + policy: ProfilePolicy::Static(0), + }]), + ), + ( + Parameters { + leaf: 0xf, + sub_leaf: RangeInclusive::new(1, 1), + register: CpuidReg::ECX, + }, + ValueDefinitions::new(&[ValueDefinition { + short: "l3c_qm_rmid_max", + description: "L3 QoS-monitoring max RMID", + bits_range: (0, 31), + policy: ProfilePolicy::Static(0), + }]), + ), + ( + Parameters { + leaf: 0xf, + sub_leaf: RangeInclusive::new(1, 1), + register: CpuidReg::EDX, + }, + ValueDefinitions::new(&[ + ValueDefinition { + short: "cqm_occup_llc", + description: "L3 QoS occupancy monitoring supported", + bits_range: (0, 0), + policy: ProfilePolicy::Static(0), + }, + ValueDefinition { + short: "cqm_mbm_total", + description: "L3 QoS total bandwidth monitoring supported", + bits_range: (1, 1), + policy: ProfilePolicy::Static(0), + }, + ValueDefinition { + short: "cqm_mbm_local", + description: "L3 QoS local bandwidth monitoring supported", + bits_range: (2, 2), + policy: ProfilePolicy::Static(0), + }, + ]), + ), + // =================================================================================================================== + // Intel Resource Director Technology Allocation Enumeration + // =================================================================================================================== + ( + Parameters { + leaf: 0x10, + sub_leaf: RangeInclusive::new(0, 0), + register: CpuidReg::EBX, + }, + //TODO: These features may be good for increased performance. Perhaps there needs to be some mechanism to opt-in for non-host CPU profiles? + ValueDefinitions::new(&[ + ValueDefinition { + short: "cat_l3", + description: "L3 Cache Allocation Technology supported", + bits_range: (1, 1), + policy: ProfilePolicy::Static(0), + }, + ValueDefinition { + short: "cat_l2", + description: "L2 Cache Allocation Technology supported", + bits_range: (2, 2), + policy: ProfilePolicy::Static(0), + }, + ValueDefinition { + short: "mba", + description: "Memory Bandwidth Allocation supported", + bits_range: (3, 3), + policy: ProfilePolicy::Static(0), + }, + ]), + ), + // =================================================================================================================== + // Intel Resource Director Technology Allocation Enumeration Sub-leaf (ECX = ResID = 1) + // =================================================================================================================== + ( + Parameters { + leaf: 0x10, + sub_leaf: RangeInclusive::new(1, 1), + register: CpuidReg::EAX, + }, + ValueDefinitions::new(&[ValueDefinition { + short: "cat_cbm_len", + description: "L3_CAT capacity bitmask length, minus-one notation", + bits_range: (0, 4), + policy: ProfilePolicy::Passthrough, + }]), + ), + ( + Parameters { + leaf: 0x10, + sub_leaf: RangeInclusive::new(1, 1), + register: CpuidReg::EBX, + }, + ValueDefinitions::new(&[ValueDefinition { + short: "cat_units_bitmap", + description: "L3_CAT bitmap of allocation units", + bits_range: (0, 31), + policy: ProfilePolicy::Passthrough, + }]), + ), + ( + Parameters { + leaf: 0x10, + sub_leaf: RangeInclusive::new(1, 1), + register: CpuidReg::ECX, + }, + //TODO: These feature may be good for increased performance. Perhaps there needs to be some mechanism to opt-in for non-host CPU profiles? + ValueDefinitions::new(&[ + ValueDefinition { + short: "l3_cat_non_cpu_agents", + description: "L3_CAT for non-CPU agent is supported", + bits_range: (1, 1), + policy: ProfilePolicy::Static(0), + }, + ValueDefinition { + short: "cdp_l3", + description: "L3/L2_CAT CDP (Code and Data Prioritization)", + bits_range: (2, 2), + policy: ProfilePolicy::Static(0), + }, + ValueDefinition { + short: "cat_sparse_1s", + description: "L3/L2_CAT non-contiguous 1s value supported", + bits_range: (3, 3), + policy: ProfilePolicy::Static(0), + }, + ]), + ), + ( + Parameters { + leaf: 0x10, + sub_leaf: RangeInclusive::new(1, 1), + register: CpuidReg::EDX, + }, + // TODO: We might need some way to opt in to use Intel cache allocation technology in guests with non-host CPU profiles. + ValueDefinitions::new(&[ValueDefinition { + short: "cat_cos_max", + description: "Highest COS number supported for this ResID", + bits_range: (0, 15), + policy: ProfilePolicy::Static(0), + }]), + ), + // =================================================================================================================== + // Intel Resource Director Technology Allocation Enumeration Sub-leaf (ECX = ResID = 2) + // =================================================================================================================== + ( + Parameters { + leaf: 0x10, + sub_leaf: RangeInclusive::new(2, 2), + register: CpuidReg::EAX, + }, + ValueDefinitions::new(&[ValueDefinition { + short: "cat_cbm_len", + description: "L2_CAT capacity bitmask length, minus-one notation", + bits_range: (0, 4), + policy: ProfilePolicy::Passthrough, + }]), + ), + ( + Parameters { + leaf: 0x10, + sub_leaf: RangeInclusive::new(2, 2), + register: CpuidReg::EBX, + }, + ValueDefinitions::new(&[ValueDefinition { + short: "cat_units_bitmap", + description: "L2_CAT bitmap of allocation units", + bits_range: (0, 31), + policy: ProfilePolicy::Passthrough, + }]), + ), + ( + Parameters { + leaf: 0x10, + sub_leaf: RangeInclusive::new(2, 2), + register: CpuidReg::EDX, + }, + ValueDefinitions::new(&[ValueDefinition { + short: "cat_cos_max", + description: "Highest COS number supported for this ResID", + bits_range: (0, 15), + policy: ProfilePolicy::Static(0), + }]), + ), + ( + Parameters { + leaf: 0x10, + sub_leaf: RangeInclusive::new(2, 2), + register: CpuidReg::ECX, + }, + // TODO: We might need some way to opt in to use Intel cache allocation technology in guests with non-host CPU profiles. + ValueDefinitions::new(&[ + ValueDefinition { + short: "cdp_l2", + description: "L2_CAT CDP (Code and Data Prioritization)", + bits_range: (2, 2), + policy: ProfilePolicy::Static(0), + }, + ValueDefinition { + short: "cat_sparse_1s", + description: "L2_CAT non-contiguous 1s value supported", + bits_range: (3, 3), + policy: ProfilePolicy::Static(0), + }, + ]), + ), + // =================================================================================================================== + // Intel Resource Director Technology Allocation Enumeration Sub-leaf (ECX = ResID = 3) + // =================================================================================================================== + ( + Parameters { + leaf: 0x10, + sub_leaf: RangeInclusive::new(3, 3), + register: CpuidReg::EAX, + }, + ValueDefinitions::new(&[ + // TODO: We might need some way to opt in to use Intel MBA technology in guests with non-host CPU profiles. + ValueDefinition { + short: "mba_max_delay", + description: "Max MBA throttling value; minus-one notation", + bits_range: (0, 11), + policy: ProfilePolicy::Static(0), + }, + ]), + ), + ( + Parameters { + leaf: 0x10, + sub_leaf: RangeInclusive::new(3, 3), + register: CpuidReg::ECX, + }, + ValueDefinitions::new(&[ + ValueDefinition { + short: "per_thread_mba", + description: "Per-thread MBA controls are supported", + bits_range: (0, 0), + policy: ProfilePolicy::Static(0), + }, + ValueDefinition { + short: "mba_delay_linear", + description: "Delay values are linear", + bits_range: (2, 2), + policy: ProfilePolicy::Static(0), + }, + ]), + ), + ( + Parameters { + leaf: 0x10, + sub_leaf: RangeInclusive::new(3, 3), + register: CpuidReg::EDX, + }, + ValueDefinitions::new(&[ValueDefinition { + short: "mba_cos_max", + description: "MBA max Class of Service supported", + bits_range: (0, 15), + policy: ProfilePolicy::Static(0), + }]), + ), + // =================================================================================================================== + // Intel Resource Director Technology Allocation Enumeration Sub-leaf (ECX = ResID = 5) + // =================================================================================================================== + // + // TODO: We may want to have some way to opt-in to use Intel RDT for guests with non-host CPU profiles. + ( + Parameters { + leaf: 0x10, + sub_leaf: RangeInclusive::new(5, 5), + register: CpuidReg::EAX, + }, + ValueDefinitions::new(&[ + ValueDefinition { + short: "core_max_throttle", + description: "Max Core throttling level supported by the corresponding ResID", + bits_range: (0, 7), + policy: ProfilePolicy::Static(0), + }, + ValueDefinition { + short: "core_scope", + description: "If 1, indicates the logical processor scope of the IA32_QoS_Core_BW_Thrtl_n MSRs. Other values are reserved", + bits_range: (8, 11), + policy: ProfilePolicy::Static(0), + }, + ]), + ), + ( + Parameters { + leaf: 0x10, + sub_leaf: RangeInclusive::new(5, 5), + register: CpuidReg::ECX, + }, + ValueDefinitions::new(&[ValueDefinition { + short: "cba_delay_linear", + description: "The response of the bandwidth control is approximately linear", + bits_range: (3, 3), + policy: ProfilePolicy::Static(0), + }]), + ), + ( + Parameters { + leaf: 0x10, + sub_leaf: RangeInclusive::new(5, 5), + register: CpuidReg::EDX, + }, + ValueDefinitions::new(&[ValueDefinition { + short: "core_cos_max", + description: "Core max Class of Service supported", + bits_range: (0, 15), + policy: ProfilePolicy::Static(0), + }]), + ), + // SGX is already disabled and deprecated so we don't need to worry about leaf 0x12 and its subleaves + + // =================================================================================================================== + // Intel Processor Trace Enumeration Main Leaf + // =================================================================================================================== + ( + Parameters { + leaf: 0x14, + sub_leaf: RangeInclusive::new(0, 0), + register: CpuidReg::EAX, + }, + ValueDefinitions::new(&[ValueDefinition { + short: "pt_max_subleaf", + description: "Maximum leaf 0x14 subleaf", + bits_range: (0, 31), + policy: ProfilePolicy::Static(0), + }]), + ), + ( + Parameters { + leaf: 0x14, + sub_leaf: RangeInclusive::new(0, 0), + register: CpuidReg::EBX, + }, + ValueDefinitions::new(&[ + ValueDefinition { + short: "cr3_filtering", + description: "IA32_RTIT_CR3_MATCH is accessible", + bits_range: (0, 0), + policy: ProfilePolicy::Static(0), + }, + ValueDefinition { + short: "psb_cyc", + description: "Configurable PSB and cycle-accurate mode", + bits_range: (1, 1), + policy: ProfilePolicy::Static(0), + }, + ValueDefinition { + short: "ip_filtering", + description: "IP/TraceStop filtering; Warm-reset PT MSRs preservation", + bits_range: (2, 2), + policy: ProfilePolicy::Static(0), + }, + ValueDefinition { + short: "mtc_timing", + description: "MTC timing packet; COFI-based packets suppression", + bits_range: (3, 3), + policy: ProfilePolicy::Static(0), + }, + ValueDefinition { + short: "ptwrite", + description: "PTWRITE support", + bits_range: (4, 4), + policy: ProfilePolicy::Static(0), + }, + ValueDefinition { + short: "power_event_trace", + description: "Power Event Trace support", + bits_range: (5, 5), + policy: ProfilePolicy::Static(0), + }, + ValueDefinition { + short: "psb_pmi_preserve", + description: "PSB and PMI preservation support", + bits_range: (6, 6), + policy: ProfilePolicy::Static(0), + }, + ValueDefinition { + short: "event_trace", + description: "Event Trace packet generation through IA32_RTIT_CTL.EventEn", + bits_range: (7, 7), + policy: ProfilePolicy::Static(0), + }, + ValueDefinition { + short: "tnt_disable", + description: "TNT packet generation disable through IA32_RTIT_CTL.DisTNT", + bits_range: (8, 8), + policy: ProfilePolicy::Static(0), + }, + ]), + ), + ( + Parameters { + leaf: 0x14, + sub_leaf: RangeInclusive::new(0, 0), + register: CpuidReg::ECX, + }, + ValueDefinitions::new(&[ + ValueDefinition { + short: "topa_output", + description: "ToPA output scheme support", + bits_range: (0, 0), + policy: ProfilePolicy::Static(0), + }, + ValueDefinition { + short: "topa_multiple_entries", + description: "ToPA tables can hold multiple entries", + bits_range: (1, 1), + policy: ProfilePolicy::Static(0), + }, + ValueDefinition { + short: "single_range_output", + description: "Single-range output scheme supported", + bits_range: (2, 2), + policy: ProfilePolicy::Static(0), + }, + ValueDefinition { + short: "trance_transport_output", + description: "Trace Transport subsystem output support", + bits_range: (3, 3), + policy: ProfilePolicy::Static(0), + }, + ValueDefinition { + short: "ip_payloads_lip", + description: "IP payloads have LIP values (CS base included)", + bits_range: (31, 31), + policy: ProfilePolicy::Static(0), + }, + ]), + ), + // =================================================================================================================== + // Intel Processor Trace Enumeration Sub-leaf 1 + // =================================================================================================================== + ( + Parameters { + leaf: 0x14, + sub_leaf: RangeInclusive::new(1, 1), + register: CpuidReg::EAX, + }, + ValueDefinitions::new(&[ + ValueDefinition { + short: "num_address_ranges", + description: "Filtering number of configurable Address Ranges", + bits_range: (0, 2), + policy: ProfilePolicy::Static(0), + }, + ValueDefinition { + short: "mtc_periods_bmp", + description: "Bitmap of supported MTC period encodings", + bits_range: (16, 31), + policy: ProfilePolicy::Static(0), + }, + ]), + ), + ( + Parameters { + leaf: 0x14, + sub_leaf: RangeInclusive::new(1, 1), + register: CpuidReg::EBX, + }, + ValueDefinitions::new(&[ + ValueDefinition { + short: "cycle_thresholds_bmp", + description: "Bitmap of supported Cycle Threshold encodings", + bits_range: (0, 15), + policy: ProfilePolicy::Static(0), + }, + ValueDefinition { + short: "psb_periods_bmp", + description: "Bitmap of supported Configurable PSB frequency encodings", + bits_range: (16, 31), + policy: ProfilePolicy::Static(0), + }, + ]), + ), + // =================================================================================================================== + // Time Stamp Counter and Core Crystal Clock Information + // =================================================================================================================== + ( + Parameters { + leaf: 0x15, + sub_leaf: RangeInclusive::new(0, 0), + register: CpuidReg::EAX, + }, + ValueDefinitions::new(&[ValueDefinition { + short: "tsc_denominator", + description: "Denominator of the TSC/'core crystal clock' ratio", + bits_range: (0, 31), + policy: ProfilePolicy::Passthrough, + }]), + ), + ( + Parameters { + leaf: 0x15, + sub_leaf: RangeInclusive::new(0, 0), + register: CpuidReg::EBX, + }, + ValueDefinitions::new(&[ValueDefinition { + short: "tsc_numerator", + description: "Numerator of the TSC/'core crystal clock' ratio", + bits_range: (0, 31), + policy: ProfilePolicy::Passthrough, + }]), + ), + ( + Parameters { + leaf: 0x15, + sub_leaf: RangeInclusive::new(0, 0), + register: CpuidReg::ECX, + }, + ValueDefinitions::new(&[ValueDefinition { + short: "cpu_crystal_hz", + description: "Core crystal clock nominal frequency, in Hz", + bits_range: (0, 31), + policy: ProfilePolicy::Passthrough, + }]), + ), + // =================================================================================================================== + // Processor Frequency Information + // =================================================================================================================== + ( + Parameters { + leaf: 0x16, + sub_leaf: RangeInclusive::new(0, 0), + register: CpuidReg::EAX, + }, + ValueDefinitions::new(&[ValueDefinition { + short: "cpu_base_mhz", + description: "Processor base frequency, in MHz", + bits_range: (0, 15), + policy: ProfilePolicy::Passthrough, + }]), + ), + ( + Parameters { + leaf: 0x16, + sub_leaf: RangeInclusive::new(0, 0), + register: CpuidReg::EBX, + }, + ValueDefinitions::new(&[ValueDefinition { + short: "cpu_max_mhz", + description: "Processor max frequency, in MHz", + bits_range: (0, 15), + policy: ProfilePolicy::Passthrough, + }]), + ), + ( + Parameters { + leaf: 0x16, + sub_leaf: RangeInclusive::new(0, 0), + register: CpuidReg::ECX, + }, + ValueDefinitions::new(&[ValueDefinition { + short: "bus_mhz", + description: "Bus reference frequency, in MHz", + bits_range: (0, 15), + policy: ProfilePolicy::Passthrough, + }]), + ), + // =================================================================================================================== + // System-On-Chip Vendor Attribute Enumeration Main Leaf + // =================================================================================================================== + + // System-On-Chip should probably not be supported for CPU profiles for the foreseeable feature. + ( + Parameters { + leaf: 0x17, + sub_leaf: RangeInclusive::new(0, 0), + register: CpuidReg::EAX, + }, + ValueDefinitions::new(&[ValueDefinition { + short: "soc_max_subleaf", + description: "Maximum leaf 0x17 subleaf", + bits_range: (0, 31), + policy: ProfilePolicy::Static(0), + }]), + ), + // =================================================================================================================== + // Deterministic Address Translation Parameters + // =================================================================================================================== + ( + Parameters { + leaf: 0x18, + sub_leaf: RangeInclusive::new(0, 0), + register: CpuidReg::EAX, + }, + ValueDefinitions::new(&[ValueDefinition { + short: "tlb_max_subleaf", + description: "Maximum leaf 0x18 subleaf", + bits_range: (0, 31), + policy: ProfilePolicy::Passthrough, + }]), + ), + ( + Parameters { + leaf: 0x18, + sub_leaf: RangeInclusive::new(0, u32::MAX), + register: CpuidReg::EBX, + }, + ValueDefinitions::new(&[ + ValueDefinition { + short: "tlb_4k_page", + description: "TLB 4KB-page entries supported", + bits_range: (0, 0), + policy: ProfilePolicy::Passthrough, + }, + ValueDefinition { + short: "tlb_2m_page", + description: "TLB 2MB-page entries supported", + bits_range: (1, 1), + policy: ProfilePolicy::Passthrough, + }, + ValueDefinition { + short: "tlb_4m_page", + description: "TLB 4MB-page entries supported", + bits_range: (2, 2), + policy: ProfilePolicy::Passthrough, + }, + ValueDefinition { + short: "tlb_1g_page", + description: "TLB 1GB-page entries supported", + bits_range: (3, 3), + policy: ProfilePolicy::Passthrough, + }, + ValueDefinition { + short: "hard_partitioning", + description: "(Hard/Soft) partitioning between logical CPUs sharing this structure", + bits_range: (8, 10), + policy: ProfilePolicy::Passthrough, + }, + ValueDefinition { + short: "n_way_associative", + description: "Ways of associativity", + bits_range: (16, 31), + policy: ProfilePolicy::Passthrough, + }, + ]), + ), + ( + Parameters { + leaf: 0x18, + sub_leaf: RangeInclusive::new(0, u32::MAX), + register: CpuidReg::ECX, + }, + ValueDefinitions::new(&[ValueDefinition { + short: "n_sets", + description: "Number of sets", + bits_range: (0, 31), + policy: ProfilePolicy::Passthrough, + }]), + ), + ( + Parameters { + leaf: 0x18, + sub_leaf: RangeInclusive::new(0, u32::MAX), + register: CpuidReg::EDX, + }, + ValueDefinitions::new(&[ + ValueDefinition { + short: "tlb_type", + description: "Translation cache type (TLB type)", + bits_range: (0, 4), + policy: ProfilePolicy::Passthrough, + }, + ValueDefinition { + short: "tlb_cache_level", + description: "Translation cache level (1-based)", + bits_range: (5, 7), + policy: ProfilePolicy::Passthrough, + }, + ValueDefinition { + short: "is_fully_associative", + description: "Fully-associative structure", + bits_range: (8, 8), + policy: ProfilePolicy::Passthrough, + }, + ValueDefinition { + short: "tlb_max_addressable_ids", + description: "Max number of addressable IDs for logical CPUs sharing this TLB - 1", + bits_range: (14, 25), + policy: ProfilePolicy::Passthrough, + }, + ]), + ), + // We don't support key locker for now (leaf 0x19): Hence we zero out leaf 0x19 for CPU profiles We zero LEAF + // 0x1A (Native Model ID Enumeration) out for CPU profiles LEAF 0x1B (PCONFIG) is zeroed out for CPU profiles + // for now + + // =================================================================================================================== + // Last Branch Records Information + // =================================================================================================================== + ( + Parameters { + leaf: 0x1c, + sub_leaf: RangeInclusive::new(0, 0), + register: CpuidReg::EAX, + }, + ValueDefinitions::new(&[ + ValueDefinition { + short: "lbr_depth_8", + description: "Max stack depth (number of LBR entries) = 8", + bits_range: (0, 0), + policy: ProfilePolicy::Static(0), + }, + ValueDefinition { + short: "lbr_depth_16", + description: "Max stack depth (number of LBR entries) = 16", + bits_range: (1, 1), + policy: ProfilePolicy::Static(0), + }, + ValueDefinition { + short: "lbr_depth_24", + description: "Max stack depth (number of LBR entries) = 24", + bits_range: (2, 2), + policy: ProfilePolicy::Static(0), + }, + ValueDefinition { + short: "lbr_depth_32", + description: "Max stack depth (number of LBR entries) = 32", + bits_range: (3, 3), + policy: ProfilePolicy::Static(0), + }, + ValueDefinition { + short: "lbr_depth_40", + description: "Max stack depth (number of LBR entries) = 40", + bits_range: (4, 4), + policy: ProfilePolicy::Static(0), + }, + ValueDefinition { + short: "lbr_depth_48", + description: "Max stack depth (number of LBR entries) = 48", + bits_range: (5, 5), + policy: ProfilePolicy::Static(0), + }, + ValueDefinition { + short: "lbr_depth_56", + description: "Max stack depth (number of LBR entries) = 56", + bits_range: (6, 6), + policy: ProfilePolicy::Static(0), + }, + ValueDefinition { + short: "lbr_depth_64", + description: "Max stack depth (number of LBR entries) = 64", + bits_range: (7, 7), + policy: ProfilePolicy::Static(0), + }, + ValueDefinition { + short: "lbr_deep_c_reset", + description: "LBRs maybe cleared on MWAIT C-state > C1", + bits_range: (30, 30), + policy: ProfilePolicy::Static(0), + }, + ValueDefinition { + short: "lbr_ip_is_lip", + description: "LBR IP contain Last IP, otherwise effective IP", + bits_range: (31, 31), + policy: ProfilePolicy::Static(0), + }, + ]), + ), + ( + Parameters { + leaf: 0x1c, + sub_leaf: RangeInclusive::new(0, 0), + register: CpuidReg::EBX, + }, + ValueDefinitions::new(&[ + ValueDefinition { + short: "lbr_cpl", + description: "CPL filtering (non-zero IA32_LBR_CTL[2:1]) supported", + bits_range: (0, 0), + policy: ProfilePolicy::Static(0), + }, + ValueDefinition { + short: "lbr_branch_filter", + description: "Branch filtering (non-zero IA32_LBR_CTL[22:16]) supported", + bits_range: (1, 1), + policy: ProfilePolicy::Static(0), + }, + ValueDefinition { + short: "lbr_call_stack", + description: "Call-stack mode (IA32_LBR_CTL[3] = 1) supported", + bits_range: (2, 2), + policy: ProfilePolicy::Static(0), + }, + ]), + ), + ( + Parameters { + leaf: 0x1c, + sub_leaf: RangeInclusive::new(0, 0), + register: CpuidReg::ECX, + }, + ValueDefinitions::new(&[ + ValueDefinition { + short: "lbr_mispredict", + description: "Branch misprediction bit supported (IA32_LBR_x_INFO[63])", + bits_range: (0, 0), + policy: ProfilePolicy::Static(0), + }, + ValueDefinition { + short: "lbr_timed_lbr", + description: "Timed LBRs (CPU cycles since last LBR entry) supported", + bits_range: (1, 1), + policy: ProfilePolicy::Static(0), + }, + ValueDefinition { + short: "lbr_branch_type", + description: "Branch type field (IA32_LBR_INFO_x[59:56]) supported", + bits_range: (2, 2), + policy: ProfilePolicy::Static(0), + }, + ValueDefinition { + short: "lbr_events_gpc_bmp", + description: "LBR PMU-events logging support; bitmap for first 4 GP (general-purpose) Counters", + bits_range: (16, 19), + policy: ProfilePolicy::Static(0), + }, + ]), + ), + // =================================================================================================================== + // Tile Information Main Leaf + // =================================================================================================================== + // NOTE: AMX is opt-in, but there are no problems with inheriting these values. The CHV will take care of zeroing out the bits userspace applications should check for if the user did not opt-in to amx. + ( + Parameters { + leaf: 0x1d, + sub_leaf: RangeInclusive::new(0, 0), + register: CpuidReg::EAX, + }, + ValueDefinitions::new(&[ValueDefinition { + short: "amx_max_palette", + description: "Highest palette ID / subleaf ID", + bits_range: (0, 31), + policy: ProfilePolicy::Inherit, + }]), + ), + // =================================================================================================================== + // Tile Palette 1 Sub-leaf + // =================================================================================================================== + // NOTE: AMX is opt-in, but there are no problems with inheriting these values. The CHV will take care of zeroing out the bits userspace applications should check for if the user did not opt-in to amx. + ( + Parameters { + leaf: 0x1d, + sub_leaf: RangeInclusive::new(1, 1), + register: CpuidReg::EAX, + }, + ValueDefinitions::new(&[ + ValueDefinition { + short: "amx_palette_size", + description: "AMX palette total tiles size, in bytes", + bits_range: (0, 15), + policy: ProfilePolicy::Inherit, + }, + ValueDefinition { + short: "amx_tile_size", + description: "AMX single tile's size, in bytes", + bits_range: (16, 31), + policy: ProfilePolicy::Inherit, + }, + ]), + ), + ( + Parameters { + leaf: 0x1d, + sub_leaf: RangeInclusive::new(1, 1), + register: CpuidReg::EBX, + }, + ValueDefinitions::new(&[ + ValueDefinition { + short: "amx_tile_row_size", + description: "AMX tile single row's size, in bytes", + bits_range: (0, 15), + policy: ProfilePolicy::Inherit, + }, + ValueDefinition { + short: "amx_palette_nr_tiles", + description: "AMX palette number of tiles", + bits_range: (16, 31), + policy: ProfilePolicy::Inherit, + }, + ]), + ), + ( + Parameters { + leaf: 0x1d, + sub_leaf: RangeInclusive::new(1, 1), + register: CpuidReg::ECX, + }, + ValueDefinitions::new(&[ValueDefinition { + short: "amx_tile_nr_rows", + description: "AMX tile max number of rows", + bits_range: (0, 15), + policy: ProfilePolicy::Inherit, + }]), + ), + // =================================================================================================================== + // TMUL Information Main Leaf + // =================================================================================================================== + // NOTE: AMX is opt-in, but there are no problems with inheriting these values. The CHV will take care of zeroing out the bits userspace applications should check for if the user did not opt-in to amx. + ( + Parameters { + leaf: 0x1e, + sub_leaf: RangeInclusive::new(0, 0), + register: CpuidReg::EAX, + }, + ValueDefinitions::new(&[ValueDefinition { + short: "tmul_info_max", + description: "Reports the maximum number of sub-leaves that are supported in leaf 0x1e", + bits_range: (0, 31), + policy: ProfilePolicy::Inherit, + }]), + ), + ( + Parameters { + leaf: 0x1e, + sub_leaf: RangeInclusive::new(0, 0), + register: CpuidReg::EBX, + }, + ValueDefinitions::new(&[ + ValueDefinition { + short: "tmul_maxk", + description: "TMUL unit maximum height, K (rows or columns)", + bits_range: (0, 7), + policy: ProfilePolicy::Inherit, + }, + ValueDefinition { + short: "tmul_maxn", + description: "TMUL unit maximum SIMD dimension, N (column bytes)", + bits_range: (8, 23), + policy: ProfilePolicy::Inherit, + }, + ]), + ), + // =================================================================================================================== + // TMUL Information Sub-leaf 1 + // =================================================================================================================== + // NOTE: AMX is opt-in, but there are no problems with inheriting these values. The CHV will take care of zeroing out the bits userspace applications should check for if the user did not opt-in to amx. + ( + Parameters { + leaf: 0x1e, + sub_leaf: RangeInclusive::new(1, 1), + register: CpuidReg::EAX, + }, + // NOTE: AMX currently requires opt-in, even for the host CPU profile. We still inherit this value for profiles as the relevant feature bits that userspace applications must check will be zeroed out if the user has not opted in for "amx" via CpuFeatures. + ValueDefinitions::new(&[ + ValueDefinition { + short: "amx_int8", + description: "If 1, the processor supports tile computational operations on 8-bit integers", + bits_range: (0, 0), + policy: ProfilePolicy::Inherit, + }, + ValueDefinition { + short: "amx_bf16", + description: "If 1, the processor supports tile computational operations on bfloat16 numbers", + bits_range: (1, 1), + policy: ProfilePolicy::Inherit, + }, + ValueDefinition { + short: "amx_complex", + description: "If 1, the processor supports the AMX-COMPLEX instructions", + bits_range: (2, 2), + policy: ProfilePolicy::Inherit, + }, + ValueDefinition { + short: "amx_fp16", + description: "If 1, the processor supports tile computational operations on FP16 numbers", + bits_range: (3, 3), + policy: ProfilePolicy::Inherit, + }, + ValueDefinition { + short: "amx_fp8", + description: "If 1, the processor supports tile computational operations on FP8 numbers", + bits_range: (4, 4), + policy: ProfilePolicy::Inherit, + }, + ValueDefinition { + short: "amx_transpose", + description: "If 1, the processor supports the AMX-TRANSPOSE instructions", + bits_range: (5, 5), + policy: ProfilePolicy::Inherit, + }, + ValueDefinition { + short: "amx_tf32", + description: "If 1, the processor supports the AMX-TF32 (FP19) instructions", + bits_range: (6, 6), + policy: ProfilePolicy::Inherit, + }, + ValueDefinition { + short: "amx_avx512", + description: "If 1, the processor supports the AMX-AVX512 instructions", + bits_range: (7, 7), + policy: ProfilePolicy::Inherit, + }, + ValueDefinition { + short: "amx_movrs", + description: "If 1, the processor supports the AMX-MOVRS instructions", + bits_range: (8, 8), + policy: ProfilePolicy::Inherit, + }, + ]), + ), + // =================================================================================================================== + // V2 Extended Topology Enumeration + // =================================================================================================================== + + // The values in leaf 0x1f must be set by CHV itself. + ( + Parameters { + leaf: 0x1f, + sub_leaf: RangeInclusive::new(0, u32::MAX), + register: CpuidReg::EAX, + }, + ValueDefinitions::new(&[ValueDefinition { + short: "x2apic_id_shift", + description: "Bit width of this level (previous levels inclusive)", + bits_range: (0, 4), + policy: ProfilePolicy::Passthrough, + }]), + ), + ( + Parameters { + leaf: 0x1f, + sub_leaf: RangeInclusive::new(0, u32::MAX), + register: CpuidReg::EBX, + }, + ValueDefinitions::new(&[ValueDefinition { + short: "domain_lcpus_count", + description: "Logical CPUs count across all instances of this domain", + bits_range: (0, 15), + policy: ProfilePolicy::Passthrough, + }]), + ), + ( + Parameters { + leaf: 0x1f, + sub_leaf: RangeInclusive::new(0, u32::MAX), + register: CpuidReg::ECX, + }, + ValueDefinitions::new(&[ + ValueDefinition { + short: "domain_level", + description: "This domain level (subleaf ID)", + bits_range: (0, 7), + policy: ProfilePolicy::Passthrough, + }, + ValueDefinition { + short: "domain_type", + description: "This domain type", + bits_range: (8, 15), + policy: ProfilePolicy::Passthrough, + }, + ]), + ), + ( + Parameters { + leaf: 0x1f, + sub_leaf: RangeInclusive::new(0, u32::MAX), + register: CpuidReg::EDX, + }, + ValueDefinitions::new(&[ValueDefinition { + short: "x2apic_id", + description: "x2APIC ID of current logical CPU", + bits_range: (0, 31), + policy: ProfilePolicy::Passthrough, + }]), + ), + // =================================================================================================================== + // Processor History Reset + // =================================================================================================================== + ( + Parameters { + leaf: 0x20, + sub_leaf: RangeInclusive::new(0, 0), + register: CpuidReg::EAX, + }, + ValueDefinitions::new(&[ValueDefinition { + short: "hreset_nr_subleaves", + description: "CPUID 0x20 max subleaf + 1", + bits_range: (0, 31), + policy: ProfilePolicy::Inherit, + }]), + ), + ( + Parameters { + leaf: 0x20, + sub_leaf: RangeInclusive::new(0, 0), + register: CpuidReg::EBX, + }, + ValueDefinitions::new(&[ValueDefinition { + short: "hreset_thread_director", + description: "HRESET of Intel thread director is supported", + bits_range: (0, 0), + policy: ProfilePolicy::Static(0), + }]), + ), + // =================================================================================================================== + // TDX + // =================================================================================================================== + + // TDX is not supported by CPU profiles for now. We just zero out this leaf for CPU profiles for the time being. + ( + Parameters { + leaf: 0x21, + sub_leaf: RangeInclusive::new(0, 0), + register: CpuidReg::EBX, + }, + ValueDefinitions::new(&[ValueDefinition { + short: "tdx_vendorid_0", + description: "TDX vendor ID string bytes 0 - 3", + bits_range: (0, 31), + policy: ProfilePolicy::Static(0), + }]), + ), + ( + Parameters { + leaf: 0x21, + sub_leaf: RangeInclusive::new(0, 0), + register: CpuidReg::ECX, + }, + ValueDefinitions::new(&[ValueDefinition { + short: "tdx_vendorid_2", + description: "CPU vendor ID string bytes 8 - 11", + bits_range: (0, 31), + policy: ProfilePolicy::Static(0), + }]), + ), + ( + Parameters { + leaf: 0x21, + sub_leaf: RangeInclusive::new(0, 0), + register: CpuidReg::EDX, + }, + ValueDefinitions::new(&[ValueDefinition { + short: "tdx_vendorid_1", + description: "CPU vendor ID string bytes 4 - 7", + bits_range: (0, 31), + policy: ProfilePolicy::Static(0), + }]), + ), + // =================================================================================================================== + // Architectural Performance Monitoring Extended Main Leaf + // =================================================================================================================== + ( + Parameters { + leaf: 0x23, + sub_leaf: RangeInclusive::new(0, 0), + register: CpuidReg::EAX, + }, + ValueDefinitions::new(&[ + ValueDefinition { + short: "subleaf_0", + description: "If 1, subleaf 0 exists", + bits_range: (0, 0), + policy: ProfilePolicy::Static(0), + }, + ValueDefinition { + short: "subleaf_1", + description: "If 1, subleaf 1 exists", + bits_range: (1, 1), + policy: ProfilePolicy::Static(0), + }, + ValueDefinition { + short: "subleaf_2", + description: "If 1, subleaf 2 exists", + bits_range: (2, 2), + policy: ProfilePolicy::Static(0), + }, + ValueDefinition { + short: "subleaf_3", + description: "If 1, subleaf 3 exists", + bits_range: (3, 3), + policy: ProfilePolicy::Static(0), + }, + ValueDefinition { + short: "subleaf_4", + description: "If 1, subleaf 4 exists", + bits_range: (4, 4), + policy: ProfilePolicy::Static(0), + }, + ValueDefinition { + short: "subleaf_5", + description: "If 1, subleaf 5 exists. The processor suppots Architectural PEBS. The IA32_PEBS_BASE and IA32_PEBS_INDEX MSRs exist", + bits_range: (5, 5), + policy: ProfilePolicy::Static(0), + }, + ]), + ), + ( + Parameters { + leaf: 0x23, + sub_leaf: RangeInclusive::new(0, 0), + register: CpuidReg::EBX, + }, + ValueDefinitions::new(&[ + ValueDefinition { + short: "unitmask2", + description: "IA32_PERFEVTSELx MSRs UnitMask2 is supported", + bits_range: (0, 0), + policy: ProfilePolicy::Static(0), + }, + ValueDefinition { + short: "eq_bit", + description: "equal flag in the IA32_PERFEVTSELx MSR is supported", + bits_range: (1, 1), + policy: ProfilePolicy::Static(0), + }, + ValueDefinition { + short: "RDPMC_USR_DISABLE", + description: "RDPMC_USR_DISABLE", + bits_range: (2, 2), + policy: ProfilePolicy::Static(0), + }, + ]), + ), + ( + Parameters { + leaf: 0x23, + sub_leaf: RangeInclusive::new(0, 0), + register: CpuidReg::ECX, + }, + ValueDefinitions::new(&[ValueDefinition { + short: "num_slots_per_cycle", + description: "Number of slots per cycle. This number can be multiplied by the number of cycles (from CPU_CLK_UNHALTED.THREAD / CPU_CLK_UNHALTED.CORE or IA32_FIXED_CTR1) to determine the total number of slots", + bits_range: (0, 7), + policy: ProfilePolicy::Static(0), + }]), + ), + // =================================================================================================================== + // Architectural Performance Monitoring Extended Sub-leaf 1 + // =================================================================================================================== + ( + Parameters { + leaf: 0x23, + sub_leaf: RangeInclusive::new(1, 1), + register: CpuidReg::EAX, + }, + ValueDefinitions::new(&[ValueDefinition { + short: "pmu_gp_counters_bitmap", + description: "General-purpose PMU counters bitmap", + bits_range: (0, 31), + policy: ProfilePolicy::Static(0), + }]), + ), + ( + Parameters { + leaf: 0x23, + sub_leaf: RangeInclusive::new(1, 1), + register: CpuidReg::EBX, + }, + ValueDefinitions::new(&[ValueDefinition { + short: "pmu_f_counters_bitmap", + description: "Fixed PMU counters bitmap", + bits_range: (0, 31), + policy: ProfilePolicy::Static(0), + }]), + ), + // =================================================================================================================== + // Architectural Performance Monitoring Extended Sub-leaf 2 + // =================================================================================================================== + ( + Parameters { + leaf: 0x23, + sub_leaf: RangeInclusive::new(2, 2), + register: CpuidReg::EAX, + }, + ValueDefinitions::new(&[ValueDefinition { + short: "pmu_acr_bitmap", + description: "Bitmap of Auto Counter Reload (ACR) general-purpose counters that can be reloaded", + bits_range: (0, 31), + policy: ProfilePolicy::Static(0), + }]), + ), + // =================================================================================================================== + // Architectural Performance Monitoring Extended Sub-leaf 3 + // =================================================================================================================== + ( + Parameters { + leaf: 0x23, + sub_leaf: RangeInclusive::new(3, 3), + register: CpuidReg::EAX, + }, + ValueDefinitions::new(&[ + ValueDefinition { + short: "core_cycles_evt", + description: "Core cycles event supported", + bits_range: (0, 0), + policy: ProfilePolicy::Static(0), + }, + ValueDefinition { + short: "insn_retired_evt", + description: "Instructions retired event supported", + bits_range: (1, 1), + policy: ProfilePolicy::Static(0), + }, + ValueDefinition { + short: "ref_cycles_evt", + description: "Reference cycles event supported", + bits_range: (2, 2), + policy: ProfilePolicy::Static(0), + }, + ValueDefinition { + short: "llc_refs_evt", + description: "Last-level cache references event supported", + bits_range: (3, 3), + policy: ProfilePolicy::Static(0), + }, + ValueDefinition { + short: "llc_misses_evt", + description: "Last-level cache misses event supported", + bits_range: (4, 4), + policy: ProfilePolicy::Static(0), + }, + ValueDefinition { + short: "br_insn_ret_evt", + description: "Branch instruction retired event supported", + bits_range: (5, 5), + policy: ProfilePolicy::Static(0), + }, + ValueDefinition { + short: "br_mispr_evt", + description: "Branch mispredict retired event supported", + bits_range: (6, 6), + policy: ProfilePolicy::Static(0), + }, + ValueDefinition { + short: "td_slots_evt", + description: "Topdown slots event supported", + bits_range: (7, 7), + policy: ProfilePolicy::Static(0), + }, + ValueDefinition { + short: "td_backend_bound_evt", + description: "Topdown backend bound event supported", + bits_range: (8, 8), + policy: ProfilePolicy::Static(0), + }, + ValueDefinition { + short: "td_bad_spec_evt", + description: "Topdown bad speculation event supported", + bits_range: (9, 9), + policy: ProfilePolicy::Static(0), + }, + ValueDefinition { + short: "td_frontend_bound_evt", + description: "Topdown frontend bound event supported", + bits_range: (10, 10), + policy: ProfilePolicy::Static(0), + }, + ValueDefinition { + short: "td_retiring_evt", + description: "Topdown retiring event support", + bits_range: (11, 11), + policy: ProfilePolicy::Static(0), + }, + ValueDefinition { + short: "lbr_inserts", + description: "LBR support", + bits_range: (12, 12), + policy: ProfilePolicy::Static(0), + }, + ]), + ), + // =================================================================================================================== + // Architectural Performance Monitoring Extended Sub-leaf 4 + // =================================================================================================================== + ( + Parameters { + leaf: 0x23, + sub_leaf: RangeInclusive::new(4, 4), + register: CpuidReg::EBX, + }, + ValueDefinitions::new(&[ + ValueDefinition { + short: "allow_in_record", + description: "If 1, indicates that the ALLOW_IN_RECORD bit is available in the IA32_PMC_GPn_CFG_C and IA32_PMC_FXm_CFG_C MSRs", + bits_range: (3, 3), + policy: ProfilePolicy::Static(0), + }, + ValueDefinition { + short: "cntr", + description: "Counters group sub-groups general-purpose counters, fixed-function counters, and performance metrics are available", + bits_range: (0, 7), + policy: ProfilePolicy::Static(0), + }, + ValueDefinition { + short: "lbr", + description: "LBR group and both bits [41:40] are available", + bits_range: (8, 9), + policy: ProfilePolicy::Static(0), + }, + ValueDefinition { + short: "xer", + description: "These bits correspond to XER group bits [55:49]", + bits_range: (17, 23), + policy: ProfilePolicy::Static(0), + }, + ValueDefinition { + short: "grp", + description: "If 1, the GRP group is available", + bits_range: (29, 29), + policy: ProfilePolicy::Static(0), + }, + ValueDefinition { + short: "aux", + description: "If 1, the AUX group is available", + bits_range: (30, 30), + policy: ProfilePolicy::Static(0), + }, + ]), + ), + ( + Parameters { + leaf: 0x23, + sub_leaf: RangeInclusive::new(4, 4), + register: CpuidReg::EBX, + }, + ValueDefinitions::new(&[ + ValueDefinition { + short: "allow_in_record", + description: "If 1, indicates that the ALLOW_IN_RECORD bit is available in the IA32_PMC_GPn_CFG_C and IA32_PMC_FXm_CFG_C MSRs", + bits_range: (3, 3), + policy: ProfilePolicy::Static(0), + }, + ValueDefinition { + short: "cntr", + description: "Counters group sub-groups general-purpose counters, fixed-function counters, and performance metrics are available", + bits_range: (0, 7), + policy: ProfilePolicy::Static(0), + }, + ValueDefinition { + short: "lbr", + description: "LBR group and both bits [41:40] are available", + bits_range: (8, 9), + policy: ProfilePolicy::Static(0), + }, + ValueDefinition { + short: "xer", + description: "These bits correspond to XER group bits [55:49]", + bits_range: (17, 23), + policy: ProfilePolicy::Static(0), + }, + ValueDefinition { + short: "grp", + description: "If 1, the GRP group is available", + bits_range: (29, 29), + policy: ProfilePolicy::Static(0), + }, + ValueDefinition { + short: "aux", + description: "If 1, the AUX group is available", + bits_range: (30, 30), + policy: ProfilePolicy::Static(0), + }, + ]), + ), + // =================================================================================================================== + // Architectural Performance Monitoring Extended Sub-leaf 5 + // =================================================================================================================== + ( + Parameters { + leaf: 0x23, + sub_leaf: RangeInclusive::new(5, 5), + register: CpuidReg::EAX, + }, + ValueDefinitions::new(&[ValueDefinition { + short: "architectural_pebs_counters", + description: "General-purpose counters support Architectural PEBS. Bit vector of general-purpose counters for which the Architectural PEBS mechanism is available", + bits_range: (0, 31), + policy: ProfilePolicy::Static(0), + }]), + ), + ( + Parameters { + leaf: 0x23, + sub_leaf: RangeInclusive::new(5, 5), + register: CpuidReg::EBX, + }, + ValueDefinitions::new(&[ValueDefinition { + short: "pebs_pdist_counters", + description: "General-purpose counters for which PEBS support PDIST", + bits_range: (0, 31), + policy: ProfilePolicy::Static(0), + }]), + ), + ( + Parameters { + leaf: 0x23, + sub_leaf: RangeInclusive::new(5, 5), + register: CpuidReg::ECX, + }, + ValueDefinitions::new(&[ValueDefinition { + short: "pebs_fixed_function_counters", + description: "Fixed-function counters support Architectural PEBS. Bit vector of fixed-function counters for which the Architectural PEBS mechanism is available. If ECX[x] == 1, then the IA32_PMC_FXm_CFG_C MSR is available, and PEBS is supported", + bits_range: (0, 31), + policy: ProfilePolicy::Static(0), + }]), + ), + ( + Parameters { + leaf: 0x23, + sub_leaf: RangeInclusive::new(5, 5), + register: CpuidReg::EDX, + }, + ValueDefinitions::new(&[ValueDefinition { + short: "pebs_fixed_function_pdist_counters", + description: "Fixed-function counters for which PEBS supports PDIST", + bits_range: (0, 31), + policy: ProfilePolicy::Static(0), + }]), + ), + // =================================================================================================================== + // Converged Vector ISA Main Leaf + // =================================================================================================================== + ( + Parameters { + leaf: 0x24, + sub_leaf: RangeInclusive::new(0, 0), + register: CpuidReg::EAX, + }, + ValueDefinitions::new(&[ValueDefinition { + short: "converged_vector_isa_max_sub_leaves", + description: "Reports the maximum number of sub-leaves that are supported in leaf 0x24", + bits_range: (0, 31), + policy: ProfilePolicy::Inherit, + }]), + ), + ( + Parameters { + leaf: 0x24, + sub_leaf: RangeInclusive::new(0, 0), + register: CpuidReg::EBX, + }, + ValueDefinitions::new(&[ + ValueDefinition { + short: "avx_10_version", + description: "Reports the intel AVX10 Converged Vector ISA version", + bits_range: (0, 7), + policy: ProfilePolicy::Inherit, + }, + ValueDefinition { + short: "avx_10_lengths", + description: "Reserved at 111", + bits_range: (0, 7), + policy: ProfilePolicy::Inherit, + }, + ]), + ), + // Hypervisor reserved CPUID leaves are set elsewhere + + // =================================================================================================================== + // Extended Function CPUID Information + // =================================================================================================================== + ( + Parameters { + leaf: 0x80000000, + sub_leaf: RangeInclusive::new(0, 0), + register: CpuidReg::EAX, + }, + ValueDefinitions::new(&[ValueDefinition { + short: "max_ext_leaf", + description: "Maximum extended CPUID leaf supported", + bits_range: (0, 31), + policy: ProfilePolicy::Inherit, + }]), + ), + ( + Parameters { + leaf: 0x80000000, + sub_leaf: RangeInclusive::new(0, 0), + register: CpuidReg::EBX, + }, + ValueDefinitions::new(&[ValueDefinition { + short: "cpu_vendorid_0", + description: "Vendor ID string bytes 0 - 3", + bits_range: (0, 31), + policy: ProfilePolicy::Passthrough, + }]), + ), + ( + Parameters { + leaf: 0x80000000, + sub_leaf: RangeInclusive::new(0, 0), + register: CpuidReg::ECX, + }, + ValueDefinitions::new(&[ValueDefinition { + short: "cpu_vendorid_2", + description: "Vendor ID string bytes 8 - 11", + bits_range: (0, 31), + policy: ProfilePolicy::Passthrough, + }]), + ), + ( + Parameters { + leaf: 0x80000000, + sub_leaf: RangeInclusive::new(0, 0), + register: CpuidReg::EDX, + }, + ValueDefinitions::new(&[ValueDefinition { + short: "cpu_vendorid_1", + description: "Vendor ID string bytes 4 - 7", + bits_range: (0, 31), + policy: ProfilePolicy::Passthrough, + }]), + ), + ( + Parameters { + leaf: 0x80000001, + sub_leaf: RangeInclusive::new(0, 0), + register: CpuidReg::EAX, + }, + // TODO: Would inherit be better than passthrough? Currently CHV manually copies these over from the host ... + ValueDefinitions::new(&[ + ValueDefinition { + short: "e_stepping_id", + description: "Stepping ID", + bits_range: (0, 3), + policy: ProfilePolicy::Passthrough, + }, + ValueDefinition { + short: "e_base_model", + description: "Base processor model", + bits_range: (4, 7), + policy: ProfilePolicy::Passthrough, + }, + ValueDefinition { + short: "e_base_family", + description: "Base processor family", + bits_range: (8, 11), + policy: ProfilePolicy::Passthrough, + }, + ValueDefinition { + short: "e_base_type", + description: "Base processor type (Transmeta)", + bits_range: (12, 13), + policy: ProfilePolicy::Passthrough, + }, + ValueDefinition { + short: "e_ext_model", + description: "Extended processor model", + bits_range: (16, 19), + policy: ProfilePolicy::Passthrough, + }, + ValueDefinition { + short: "e_ext_family", + description: "Extended processor family", + bits_range: (20, 27), + policy: ProfilePolicy::Passthrough, + }, + ]), + ), + ( + Parameters { + leaf: 0x80000001, + sub_leaf: RangeInclusive::new(0, 0), + register: CpuidReg::EBX, + }, + ValueDefinitions::new(&[ + ValueDefinition { + short: "brand_id", + description: "Brand ID", + bits_range: (0, 15), + policy: ProfilePolicy::Passthrough, + }, + ValueDefinition { + short: "pkg_type", + description: "Package type", + bits_range: (28, 31), + policy: ProfilePolicy::Passthrough, + }, + ]), + ), + ( + Parameters { + leaf: 0x80000001, + sub_leaf: RangeInclusive::new(0, 0), + register: CpuidReg::ECX, + }, + ValueDefinitions::new(&[ + ValueDefinition { + short: "lahf_lm", + description: "LAHF and SAHF in 64-bit mode", + bits_range: (0, 0), + policy: ProfilePolicy::Inherit, + }, + ValueDefinition { + short: "lzcnt", + description: "LZCNT advanced bit manipulation", + bits_range: (5, 5), + policy: ProfilePolicy::Inherit, + }, + ValueDefinition { + short: "prefetchw", + description: "3DNow PREFETCH/PREFETCHW support", + bits_range: (8, 8), + policy: ProfilePolicy::Inherit, + }, + ]), + ), + ( + Parameters { + leaf: 0x80000001, + sub_leaf: RangeInclusive::new(0, 0), + register: CpuidReg::EDX, + }, + ValueDefinitions::new(&[ + ValueDefinition { + short: "syscall", + description: "SYSCALL and SYSRET instructions", + bits_range: (11, 11), + policy: ProfilePolicy::Inherit, + }, + ValueDefinition { + short: "nx", + description: "Execute Disable Bit available", + bits_range: (20, 20), + policy: ProfilePolicy::Inherit, + }, + ValueDefinition { + short: "pdpe1gb", + description: "1-GB large page support", + bits_range: (26, 26), + policy: ProfilePolicy::Inherit, + }, + ValueDefinition { + short: "rdtscp", + description: "RDTSCP instruction and IA32_TSC_AUX are available", + bits_range: (27, 27), + policy: ProfilePolicy::Inherit, + }, + ValueDefinition { + short: "lm", + description: "Long mode (x86-64, 64-bit support)", + bits_range: (29, 29), + policy: ProfilePolicy::Inherit, + }, + ]), + ), + // The profile generation tool will actually modify the brand id string before + // acting on the policy set here. + ( + Parameters { + leaf: 0x80000002, + sub_leaf: RangeInclusive::new(0, 0), + register: CpuidReg::EAX, + }, + ValueDefinitions::new(&[ValueDefinition { + short: "cpu_brandid_0", + description: "CPU brand ID string, bytes 0 - 3", + bits_range: (0, 31), + policy: ProfilePolicy::Inherit, + }]), + ), + ( + Parameters { + leaf: 0x80000002, + sub_leaf: RangeInclusive::new(0, 0), + register: CpuidReg::EBX, + }, + ValueDefinitions::new(&[ValueDefinition { + short: "cpu_brandid_1", + description: "CPU brand ID string, bytes 4 - 7", + bits_range: (0, 31), + policy: ProfilePolicy::Inherit, + }]), + ), + ( + Parameters { + leaf: 0x80000002, + sub_leaf: RangeInclusive::new(0, 0), + register: CpuidReg::ECX, + }, + ValueDefinitions::new(&[ValueDefinition { + short: "cpu_brandid_2", + description: "CPU brand ID string, bytes 8 - 11", + bits_range: (0, 31), + policy: ProfilePolicy::Inherit, + }]), + ), + ( + Parameters { + leaf: 0x80000002, + sub_leaf: RangeInclusive::new(0, 0), + register: CpuidReg::EDX, + }, + ValueDefinitions::new(&[ValueDefinition { + short: "cpu_brandid_3", + description: "CPU brand ID string, bytes 12 - 15", + bits_range: (0, 31), + policy: ProfilePolicy::Inherit, + }]), + ), + ( + Parameters { + leaf: 0x80000003, + sub_leaf: RangeInclusive::new(0, 0), + register: CpuidReg::EAX, + }, + ValueDefinitions::new(&[ValueDefinition { + short: "cpu_brandid_4", + description: "CPU brand ID string bytes, 16 - 19", + bits_range: (0, 31), + policy: ProfilePolicy::Inherit, + }]), + ), + ( + Parameters { + leaf: 0x80000003, + sub_leaf: RangeInclusive::new(0, 0), + register: CpuidReg::EBX, + }, + ValueDefinitions::new(&[ValueDefinition { + short: "cpu_brandid_5", + description: "CPU brand ID string bytes, 20 - 23", + bits_range: (0, 31), + policy: ProfilePolicy::Inherit, + }]), + ), + ( + Parameters { + leaf: 0x80000003, + sub_leaf: RangeInclusive::new(0, 0), + register: CpuidReg::ECX, + }, + ValueDefinitions::new(&[ValueDefinition { + short: "cpu_brandid_6", + description: "CPU brand ID string bytes, 24 - 27", + bits_range: (0, 31), + policy: ProfilePolicy::Inherit, + }]), + ), + ( + Parameters { + leaf: 0x80000003, + sub_leaf: RangeInclusive::new(0, 0), + register: CpuidReg::EDX, + }, + ValueDefinitions::new(&[ValueDefinition { + short: "cpu_brandid_7", + description: "CPU brand ID string bytes, 28 - 31", + bits_range: (0, 31), + policy: ProfilePolicy::Inherit, + }]), + ), + ( + Parameters { + leaf: 0x80000004, + sub_leaf: RangeInclusive::new(0, 0), + register: CpuidReg::EAX, + }, + ValueDefinitions::new(&[ValueDefinition { + short: "cpu_brandid_8", + description: "CPU brand ID string, bytes 32 - 35", + bits_range: (0, 31), + policy: ProfilePolicy::Inherit, + }]), + ), + ( + Parameters { + leaf: 0x80000004, + sub_leaf: RangeInclusive::new(0, 0), + register: CpuidReg::EBX, + }, + ValueDefinitions::new(&[ValueDefinition { + short: "cpu_brandid_9", + description: "CPU brand ID string, bytes 36 - 39", + bits_range: (0, 31), + policy: ProfilePolicy::Inherit, + }]), + ), + ( + Parameters { + leaf: 0x80000004, + sub_leaf: RangeInclusive::new(0, 0), + register: CpuidReg::ECX, + }, + ValueDefinitions::new(&[ValueDefinition { + short: "cpu_brandid_10", + description: "CPU brand ID string, bytes 40 - 43", + bits_range: (0, 31), + policy: ProfilePolicy::Inherit, + }]), + ), + ( + Parameters { + leaf: 0x80000004, + sub_leaf: RangeInclusive::new(0, 0), + register: CpuidReg::EDX, + }, + ValueDefinitions::new(&[ValueDefinition { + short: "cpu_brandid_11", + description: "CPU brand ID string, bytes 44 - 47", + bits_range: (0, 31), + policy: ProfilePolicy::Inherit, + }]), + ), + ( + Parameters { + leaf: 0x80000006, + sub_leaf: RangeInclusive::new(0, 0), + register: CpuidReg::ECX, + }, + ValueDefinitions::new(&[ + ValueDefinition { + short: "l2_line_size", + description: "L2 cache line size, in bytes", + bits_range: (0, 7), + policy: ProfilePolicy::Passthrough, + }, + ValueDefinition { + short: "l2_nlines", + description: "L2 cache number of lines per tag", + bits_range: (8, 11), + policy: ProfilePolicy::Passthrough, + }, + ValueDefinition { + short: "l2_assoc", + description: "L2 cache associativity", + bits_range: (12, 15), + policy: ProfilePolicy::Passthrough, + }, + ValueDefinition { + short: "l2_size_kb", + description: "L2 cache size, in KB", + bits_range: (16, 31), + policy: ProfilePolicy::Passthrough, + }, + ]), + ), + // EAX, EBX and ECX of 0x8000_0007 are all reserved (=0) on Intel + ( + Parameters { + leaf: 0x80000007, + sub_leaf: RangeInclusive::new(0, 0), + register: CpuidReg::EDX, + }, + ValueDefinitions::new(&[ + // TODO: We may want some mechanism to let users opt-in to using an invariant TSC provided by the hardware (when available). + // TODO: Probably unconditionally set by CHV + ValueDefinition { + short: "constant_tsc", + description: "TSC ticks at constant rate across all P and C states", + bits_range: (8, 8), + policy: ProfilePolicy::Inherit, + }, + ]), + ), + ( + Parameters { + leaf: 0x80000008, + sub_leaf: RangeInclusive::new(0, 0), + register: CpuidReg::EAX, + }, + ValueDefinitions::new(&[ + ValueDefinition { + short: "phys_addr_bits", + description: "Max physical address bits", + bits_range: (0, 7), + policy: ProfilePolicy::Passthrough, + }, + ValueDefinition { + short: "virt_addr_bits", + description: "Max virtual address bits", + bits_range: (8, 15), + policy: ProfilePolicy::Passthrough, + }, + ValueDefinition { + short: "guest_phys_addr_bits", + description: "Max nested-paging guest physical address bits", + bits_range: (16, 23), + policy: ProfilePolicy::Passthrough, + }, + ]), + ), + ( + Parameters { + leaf: 0x80000008, + sub_leaf: RangeInclusive::new(0, 0), + register: CpuidReg::EBX, + }, + ValueDefinitions::new(&[ValueDefinition { + short: "wbnoinvd", + description: "WBNOINVD supported", + bits_range: (9, 9), + policy: ProfilePolicy::Static(0), + }]), + ), + ]) +}; diff --git a/arch/src/x86_64/cpuid_definitions/mod.rs b/arch/src/x86_64/cpuid_definitions/mod.rs index bff3cc6a4b..1654aa699f 100644 --- a/arch/src/x86_64/cpuid_definitions/mod.rs +++ b/arch/src/x86_64/cpuid_definitions/mod.rs @@ -10,6 +10,8 @@ use serde::{Deserialize, Deserializer, Serialize, Serializer}; use crate::x86_64::CpuidReg; +pub mod intel; + pub(in crate::x86_64) fn serialize_as_hex( input: &u32, serializer: S, From 01014c4a094383523122e06f121cda97feb98987 Mon Sep 17 00:00:00 2001 From: Oliver Anderson Date: Wed, 10 Dec 2025 11:13:01 +0100 Subject: [PATCH 293/294] arch: KVM CPUID definitions We introduce CPUID definitions defined for the KVM hypervisor. These definitions will later be utilized by the upcoming CPU profile generation tool. Signed-off-by: Oliver Anderson On-behalf-of: SAP oliver.anderson@sap.com --- arch/src/x86_64/cpuid_definitions/kvm.rs | 204 +++++++++++++++++++++++ arch/src/x86_64/cpuid_definitions/mod.rs | 2 + 2 files changed, 206 insertions(+) create mode 100644 arch/src/x86_64/cpuid_definitions/kvm.rs diff --git a/arch/src/x86_64/cpuid_definitions/kvm.rs b/arch/src/x86_64/cpuid_definitions/kvm.rs new file mode 100644 index 0000000000..89285b2aa4 --- /dev/null +++ b/arch/src/x86_64/cpuid_definitions/kvm.rs @@ -0,0 +1,204 @@ +//! This module contains CPUID definitions for the KVM hypervisor. + +use std::ops::RangeInclusive; + +use crate::x86_64::CpuidReg; +use crate::x86_64::cpuid_definitions::{ + CpuidDefinitions, Parameters, ProfilePolicy, ValueDefinition, ValueDefinitions, +}; + +/// CPUID features defined for the KVM hypervisor. +/// +/// See https://www.kernel.org/doc/html/latest/virt/kvm/x86/cpuid.html +pub const KVM_CPUID_DEFINITIONS: CpuidDefinitions<6> = const { + CpuidDefinitions([ + //===================================================================== + // KVM CPUID Signature + // =================================================================== + ( + Parameters { + leaf: 0x4000_0000, + sub_leaf: RangeInclusive::new(0, 0), + register: CpuidReg::EAX, + }, + ValueDefinitions::new(&[ValueDefinition { + short: "max_hypervisor_leaf", + description: "The maximum valid leaf between 0x4000_0000 and 0x4FFF_FFF", + bits_range: (0, 31), + policy: ProfilePolicy::Passthrough, + }]), + ), + ( + Parameters { + leaf: 0x4000_0000, + sub_leaf: RangeInclusive::new(0, 0), + register: CpuidReg::EBX, + }, + ValueDefinitions::new(&[ValueDefinition { + short: "hypervisor_string_ebx", + description: "Part of the hypervisor string", + bits_range: (0, 31), + policy: ProfilePolicy::Passthrough, + }]), + ), + ( + Parameters { + leaf: 0x4000_0000, + sub_leaf: RangeInclusive::new(0, 0), + register: CpuidReg::ECX, + }, + ValueDefinitions::new(&[ValueDefinition { + short: "hypervisor_string_ecx", + description: "Part of the hypervisor string", + bits_range: (0, 31), + policy: ProfilePolicy::Passthrough, + }]), + ), + ( + Parameters { + leaf: 0x4000_0000, + sub_leaf: RangeInclusive::new(0, 0), + register: CpuidReg::EDX, + }, + ValueDefinitions::new(&[ValueDefinition { + short: "hypervisor_string_edx", + description: "Part of the hypervisor string", + bits_range: (0, 31), + policy: ProfilePolicy::Passthrough, + }]), + ), + //===================================================================== + // KVM CPUID Features + // =================================================================== + ( + Parameters { + leaf: 0x4000_0001, + sub_leaf: RangeInclusive::new(0, 0), + register: CpuidReg::EAX, + }, + ValueDefinitions::new(&[ + ValueDefinition { + short: "kvm_feature_clocksource", + description: "kvmclock available at MSRs 0x11 and 0x12", + bits_range: (0, 0), + policy: ProfilePolicy::Passthrough, + }, + ValueDefinition { + short: "kvm_feature_nop_io_delay", + description: "Not necessary to perform delays on PIO operations", + bits_range: (1, 1), + policy: ProfilePolicy::Passthrough, + }, + ValueDefinition { + short: "kvm_feature_mmu_op", + description: "Deprecated", + bits_range: (2, 2), + policy: ProfilePolicy::Passthrough, + }, + ValueDefinition { + short: "kvm_feature_clocksource2", + description: "kvmclock available at MSRs 0x4b564d00 and 0x4b564d01", + bits_range: (3, 3), + policy: ProfilePolicy::Passthrough, + }, + ValueDefinition { + short: "kvm_feature_async_pf", + description: "async pf can be enabled by writing to MSR 0x4b564d02", + bits_range: (4, 4), + policy: ProfilePolicy::Passthrough, + }, + ValueDefinition { + short: "kvm_feature_steal_time", + description: "steal time can be enabled by writing to msr 0x4b564d03", + bits_range: (5, 5), + policy: ProfilePolicy::Passthrough, + }, + ValueDefinition { + short: "kvm_feature_pv_eoi", + description: "paravirtualized end of interrupt handler can be enabled by writing to msr 0x4b564d04", + bits_range: (6, 6), + policy: ProfilePolicy::Passthrough, + }, + ValueDefinition { + short: "kvm_feature_pv_unhalt", + description: "guest checks this feature bit before enabling paravirtualized spinlock support", + bits_range: (7, 7), + policy: ProfilePolicy::Passthrough, + }, + ValueDefinition { + short: "kvm_feature_pv_tlb_flush", + description: "guest checks this feature bit before enabling paravirtualized tlb flush", + bits_range: (9, 9), + policy: ProfilePolicy::Passthrough, + }, + ValueDefinition { + short: "kvm_feature_async_pf_vmexit", + description: "paravirtualized async PF VM EXIT can be enabled by setting bit 2 when writing to msr 0x4b564d02", + bits_range: (10, 10), + policy: ProfilePolicy::Passthrough, + }, + ValueDefinition { + short: "kvm_feature_pv_send_ipi", + description: "guest checks this feature bit before enabling paravirtualized send IPIs", + bits_range: (11, 11), + policy: ProfilePolicy::Passthrough, + }, + ValueDefinition { + short: "kvm_feature_poll_control", + description: "host-side polling on HLT can be disabled by writing to msr 0x4b564d05.", + bits_range: (12, 12), + policy: ProfilePolicy::Passthrough, + }, + ValueDefinition { + short: "kvm_feature_pv_sched_yield", + description: "guest checks this feature bit before using paravirtualized sched yield.", + bits_range: (13, 13), + policy: ProfilePolicy::Passthrough, + }, + ValueDefinition { + short: "kvm_feature_async_pf_int", + description: "guest checks this feature bit before using the second async pf control msr 0x4b564d06 and async pf acknowledgment msr 0x4b564d07.", + bits_range: (14, 14), + policy: ProfilePolicy::Passthrough, + }, + ValueDefinition { + short: "kvm_feature_msi_ext_dest_id", + description: "guest checks this feature bit before using extended destination ID bits in MSI address bits 11-5.", + bits_range: (15, 15), + policy: ProfilePolicy::Passthrough, + }, + ValueDefinition { + short: "kvm_feature_hc_map_gpa_range", + description: "guest checks this feature bit before using the map gpa range hypercall to notify the page state change", + bits_range: (16, 16), + policy: ProfilePolicy::Passthrough, + }, + ValueDefinition { + short: "kvm_feature_migration_control", + description: "guest checks this feature bit before using MSR_KVM_MIGRATION_CONTROL", + bits_range: (17, 17), + policy: ProfilePolicy::Passthrough, + }, + ValueDefinition { + short: "kvm_feature_clocksource_stable_bit", + description: "host will warn if no guest-side per-cpu warps are expected in kvmclock", + bits_range: (24, 24), + policy: ProfilePolicy::Passthrough, + }, + ]), + ), + ( + Parameters { + leaf: 0x4000_0001, + sub_leaf: RangeInclusive::new(0, 0), + register: CpuidReg::EDX, + }, + ValueDefinitions::new(&[ValueDefinition { + short: "kvm_hints_realtime", + description: "guest checks this feature bit to determine that vCPUs are never preempted for an unlimited time allowing optimizations", + bits_range: (0, 0), + policy: ProfilePolicy::Passthrough, + }]), + ), + ]) +}; diff --git a/arch/src/x86_64/cpuid_definitions/mod.rs b/arch/src/x86_64/cpuid_definitions/mod.rs index 1654aa699f..ee62550d80 100644 --- a/arch/src/x86_64/cpuid_definitions/mod.rs +++ b/arch/src/x86_64/cpuid_definitions/mod.rs @@ -11,6 +11,8 @@ use serde::{Deserialize, Deserializer, Serialize, Serializer}; use crate::x86_64::CpuidReg; pub mod intel; +#[cfg(feature = "kvm")] +pub mod kvm; pub(in crate::x86_64) fn serialize_as_hex( input: &u32, From 1df5f84fd2c1f84c5deda0cebfbf6ecc2fd8b1d6 Mon Sep 17 00:00:00 2001 From: Oliver Anderson Date: Wed, 10 Dec 2025 12:42:00 +0100 Subject: [PATCH 294/294] arch: Improve CPUID incompatibility logging We use the Intel CPUID definitions to provide more information when CPUID compatibility checks fail (when both the source and destination VM run on Intel CPUs). Signed-off-by: Oliver Anderson On-behalf-of: SAP oliver.anderson@sap.com --- arch/src/x86_64/mod.rs | 60 +++++++++++++++++++++++++++++++++++++++--- 1 file changed, 57 insertions(+), 3 deletions(-) diff --git a/arch/src/x86_64/mod.rs b/arch/src/x86_64/mod.rs index 7d7327d77b..a2e14c4b1d 100644 --- a/arch/src/x86_64/mod.rs +++ b/arch/src/x86_64/mod.rs @@ -528,8 +528,60 @@ impl CpuidFeatureEntry { let src_vm_features = Self::get_features_from_cpuid(src_vm_cpuid, feature_entry_list); let dest_vm_features = Self::get_features_from_cpuid(dest_vm_cpuid, feature_entry_list); - // Loop on feature bit and check if the 'source vm' feature is a subset - // of those of the 'destination vm' feature + // If both processors are Intel then we can use the existing Intel CPUID definitions to log more + // precise information about potential errors + let both_intel = { + // Check if the vendor string is "GenuineIntel". This assumes that `leaf_0` is the entry + // corresponding to CPUID leaf 0. + let is_intel = |leaf_0: &CpuIdEntry| { + leaf_0.ebx == 0x756e_6547 && leaf_0.ecx == 0x6c65_746e && leaf_0.edx == 0x4965_6e69 + }; + let src_0 = src_vm_cpuid + .iter() + .find(|entry| (entry.function == 0x0) & (entry.index == 0x0)); + let dest_0 = dest_vm_cpuid + .iter() + .find(|entry| (entry.function == 0x0) & (entry.index == 0x0)); + src_0 + .zip(dest_0) + .is_some_and(|(src, dest)| is_intel(src) & is_intel(dest)) + }; + let extra_reporting = |entry: &CpuidFeatureEntry, src_reg: u32, dest_reg: u32| { + if let Some((_, defs)) = cpuid_definitions::intel::INTEL_CPUID_DEFINITIONS + .as_slice() + .iter() + .find(|(param, _)| { + (param.leaf == entry.function) && (param.sub_leaf.contains(&entry.index)) + }) + { + for def in defs.as_slice() { + let mask = (def.bits_range.0..=def.bits_range.1) + .fold(0, |acc, next| acc | (1 << next)); + + let src_val = src_reg & mask; + let dest_val = dest_reg & mask; + + let is_compatible = match entry.compatible_check { + CpuidCompatibleCheck::BitwiseSubset => (src_val & (!dest_val)) == 0, + CpuidCompatibleCheck::NumNotGreater => src_val <= dest_val, + CpuidCompatibleCheck::Equal => src_val == dest_val, + }; + if !is_compatible { + info!( + "CPUID incompatibility for value definition='{:?}' detected in leaf={:#02x}, sub-leaf={:#02x}, register={:?}, compatibility_check={:?}, source VM value='{:#04x}' destination VM value='{:#04x}'", + def, + entry.function, + entry.index, + entry.feature_reg, + entry.compatible_check, + src_val, + dest_val + ); + } + } + } + }; + let mut compatible = true; for (i, (src_vm_feature, dest_vm_feature)) in src_vm_features .iter() @@ -557,7 +609,9 @@ impl CpuidFeatureEntry { src_vm_feature, dest_vm_feature ); - + if both_intel { + extra_reporting(entry, *src_vm_feature, *dest_vm_feature); + } compatible = false; } }