From 4d4ecb1b65f5fc31e85f640013cd181a4463eb0b Mon Sep 17 00:00:00 2001 From: Changyuan Lyu Date: Mon, 15 Dec 2025 18:05:13 -0800 Subject: [PATCH 1/4] refactor(kvm): handle check/enable capability errors Signed-off-by: Changyuan Lyu --- alioth/src/hv/kvm/kvm.rs | 18 ++++++------- alioth/src/hv/kvm/vcpu/vcpu_aarch64.rs | 5 ++-- alioth/src/hv/kvm/vm/vm.rs | 35 +++++++++++++------------- alioth/src/hv/kvm/vm/vm_x86_64.rs | 31 ++++++++--------------- alioth/src/sys/linux/kvm.rs | 2 -- 5 files changed, 36 insertions(+), 55 deletions(-) diff --git a/alioth/src/hv/kvm/kvm.rs b/alioth/src/hv/kvm/kvm.rs index 38ba6b36..97e37701 100644 --- a/alioth/src/hv/kvm/kvm.rs +++ b/alioth/src/hv/kvm/kvm.rs @@ -49,7 +49,7 @@ use crate::hv::{Hypervisor, MemMapOption, Result, VmConfig, error}; use crate::sys::kvm::KvmDevType; #[cfg(target_arch = "x86_64")] use crate::sys::kvm::kvm_get_supported_cpuid; -use crate::sys::kvm::{KVM_API_VERSION, kvm_get_api_version}; +use crate::sys::kvm::{KVM_API_VERSION, KvmCap, kvm_get_api_version}; #[cfg(target_arch = "x86_64")] use crate::sys::kvm::{KVM_MAX_CPUID_ENTRIES, KvmCpuid2, KvmCpuid2Flag, KvmCpuidEntry2}; @@ -80,16 +80,12 @@ pub enum KvmError { MmapOption { option: MemMapOption }, #[snafu(display("Failed to mmap a VCPU fd"))] MmapVcpuFd { error: std::io::Error }, - #[snafu(display("Failed to check extension {ext}"))] - CheckExtension { - ext: &'static str, - error: std::io::Error, - }, - #[snafu(display("Failed to enable capability {cap}"))] - EnableCap { - cap: &'static str, - error: std::io::Error, - }, + #[snafu(display("Failed to check KVM capability"))] + CheckCap { error: std::io::Error }, + #[snafu(display("KVM Capability {ext:?} not supported"))] + NotSupported { ext: KvmCap }, + #[snafu(display("Failed to enable capability {cap:?}"))] + EnableCap { cap: KvmCap, error: std::io::Error }, #[snafu(display("Failed to create guest memfd"))] GuestMemfd { error: std::io::Error }, #[cfg(target_arch = "aarch64")] diff --git a/alioth/src/hv/kvm/vcpu/vcpu_aarch64.rs b/alioth/src/hv/kvm/vcpu/vcpu_aarch64.rs index 563484f9..a6001733 100644 --- a/alioth/src/hv/kvm/vcpu/vcpu_aarch64.rs +++ b/alioth/src/hv/kvm/vcpu/vcpu_aarch64.rs @@ -53,9 +53,8 @@ impl KvmVcpu { pub fn kvm_vcpu_init(&mut self, is_bsp: bool) -> Result<()> { let mut arm_cpu_init = unsafe { kvm_arm_preferred_target(&self.vm.fd) }.context(error::CreateVcpu)?; - if self.vm.check_extension(KvmCap::ARM_PSCI_0_2)? == 1 { - arm_cpu_init.features[0] |= KvmArmVcpuFeature::PSCI_0_2.bits(); - } + self.vm.check_extension(KvmCap::ARM_PSCI_0_2)?; + arm_cpu_init.features[0] |= KvmArmVcpuFeature::PSCI_0_2.bits(); if !is_bsp { arm_cpu_init.features[0] |= KvmArmVcpuFeature::POWER_OFF.bits(); } diff --git a/alioth/src/hv/kvm/vm/vm.rs b/alioth/src/hv/kvm/vm/vm.rs index 0c16a082..3a019533 100644 --- a/alioth/src/hv/kvm/vm/vm.rs +++ b/alioth/src/hv/kvm/vm/vm.rs @@ -46,13 +46,14 @@ use crate::sys::kvm::KVM_IRQCHIP_IOAPIC; #[cfg(target_arch = "aarch64")] use crate::sys::kvm::KvmMsiFlag; use crate::sys::kvm::{ - KVM_IRQ_ROUTING_IRQCHIP, KVM_IRQ_ROUTING_MSI, KvmCap, KvmEncRegion, KvmIoEventFd, + KVM_IRQ_ROUTING_IRQCHIP, KVM_IRQ_ROUTING_MSI, KvmCap, KvmEnableCap, KvmEncRegion, KvmIoEventFd, KvmIoEventFdFlag, KvmIrqRouting, KvmIrqRoutingEntry, KvmIrqRoutingIrqchip, KvmIrqRoutingMsi, KvmIrqfd, KvmIrqfdFlag, KvmMemFlag, KvmMemoryAttribute, KvmMemoryAttributes, KvmMsi, KvmUserspaceMemoryRegion, KvmUserspaceMemoryRegion2, kvm_check_extension, kvm_create_vm, - kvm_get_vcpu_mmap_size, kvm_ioeventfd, kvm_irqfd, kvm_memory_encrypt_reg_region, - kvm_memory_encrypt_unreg_region, kvm_set_gsi_routing, kvm_set_memory_attributes, - kvm_set_user_memory_region, kvm_set_user_memory_region2, kvm_signal_msi, + kvm_enable_cap, kvm_get_vcpu_mmap_size, kvm_ioeventfd, kvm_irqfd, + kvm_memory_encrypt_reg_region, kvm_memory_encrypt_unreg_region, kvm_set_gsi_routing, + kvm_set_memory_attributes, kvm_set_user_memory_region, kvm_set_user_memory_region2, + kvm_signal_msi, }; #[cfg(target_arch = "aarch64")] @@ -122,16 +123,19 @@ impl VmInner { Ok(()) } - pub fn check_extension(&self, id: KvmCap) -> Result { - let ret = unsafe { kvm_check_extension(&self.fd, id) }; - match ret { - Ok(num) => Ok(num), - Err(_) => error::Capability { - cap: "KVM_CAP_CHECK_EXTENSION_VM", - } - .fail(), + pub fn check_extension(&self, id: KvmCap) -> Result { + let ret = unsafe { kvm_check_extension(&self.fd, id) }.context(kvm_error::CheckCap)?; + if ret == 0 { + kvm_error::NotSupported { ext: id }.fail() + } else { + Ok(ret) } } + + pub fn enable_cap(&self, cap: &KvmEnableCap) -> Result<(), KvmError> { + unsafe { kvm_enable_cap(&self.fd, cap) }.context(kvm_error::EnableCap { cap: cap.cap })?; + Ok(()) + } } impl Display for VmInner { @@ -665,12 +669,7 @@ impl Vm for KvmVm { if self.vm.pin_map.fetch_or(pin_flag, Ordering::AcqRel) & pin_flag == pin_flag { return Err(std::io::ErrorKind::AlreadyExists.into()).context(error::CreateIrq { pin }); } - if self.vm.check_extension(KvmCap::IRQFD)? == 0 { - return error::Capability { - cap: "KVM_CAP_IRQFD", - } - .fail(); - } + self.vm.check_extension(KvmCap::IRQFD)?; let event_fd = ffi!(unsafe { eventfd(0, EFD_CLOEXEC | EFD_NONBLOCK) }) .context(error::CreateIrq { pin })?; let request = KvmIrqfd { diff --git a/alioth/src/hv/kvm/vm/vm_x86_64.rs b/alioth/src/hv/kvm/vm/vm_x86_64.rs index 29a176ac..73db3d3c 100644 --- a/alioth/src/hv/kvm/vm/vm_x86_64.rs +++ b/alioth/src/hv/kvm/vm/vm_x86_64.rs @@ -21,9 +21,8 @@ use crate::hv::kvm::sev::SevFd; use crate::hv::kvm::{KvmError, KvmVm, kvm_error}; use crate::hv::{Coco, Kvm, Result, VmConfig, error}; use crate::sys::kvm::{ - KvmCap, KvmCreateGuestMemfd, KvmEnableCap, KvmVmType, kvm_check_extension, - kvm_create_guest_memfd, kvm_create_irqchip, kvm_enable_cap, kvm_memory_encrypt_op, - kvm_set_identity_map_addr, kvm_set_tss_addr, + KvmCap, KvmCreateGuestMemfd, KvmEnableCap, KvmVmType, kvm_create_guest_memfd, + kvm_create_irqchip, kvm_memory_encrypt_op, kvm_set_identity_map_addr, kvm_set_tss_addr, }; use crate::sys::sev::{ KvmSevCmd, KvmSevCmdId, KvmSevInit, KvmSevLaunchMeasure, KvmSevLaunchStart, @@ -85,24 +84,14 @@ impl KvmVm { } } Some(Coco::AmdSnp { .. }) => { - let bitmap = - unsafe { kvm_check_extension(&self.vm.fd, KvmCap::EXIT_HYPERCALL) } - .context(kvm_error::CheckExtension { - ext: "KVM_CAP_EXIT_HYPERCALL", - })?; - if bitmap != 0 { - let request = KvmEnableCap { - cap: KvmCap::EXIT_HYPERCALL, - args: [bitmap as _, 0, 0, 0], - flags: 0, - pad: [0; 64], - }; - unsafe { kvm_enable_cap(&self.vm.fd, &request) }.context( - kvm_error::EnableCap { - cap: "KVM_CAP_EXIT_HYPERCALL", - }, - )?; - } + let bitmap = self.vm.check_extension(KvmCap::EXIT_HYPERCALL)?; + let request = KvmEnableCap { + cap: KvmCap::EXIT_HYPERCALL, + args: [bitmap as _, 0, 0, 0], + flags: 0, + pad: [0; 64], + }; + self.vm.enable_cap(&request)?; let mut init = KvmSevInit::default(); self.sev_op(KvmSevCmdId::INIT2, Some(&mut init))?; log::debug!("{}: snp init: {init:#x?}", self.vm); diff --git a/alioth/src/sys/linux/kvm.rs b/alioth/src/sys/linux/kvm.rs index 5f808d65..b9e7f04d 100644 --- a/alioth/src/sys/linux/kvm.rs +++ b/alioth/src/sys/linux/kvm.rs @@ -526,7 +526,6 @@ pub struct KvmEncRegion { pub size: u64, } -#[cfg(target_arch = "x86_64")] #[repr(C)] #[derive(Debug, Clone)] pub struct KvmEnableCap { @@ -688,7 +687,6 @@ ioctl_write_buf!(kvm_set_msrs, KVMIO, 0x89, KvmMsrs); #[cfg(target_arch = "x86_64")] ioctl_write_buf!(kvm_set_cpuid2, KVMIO, 0x90, KvmCpuid2); -#[cfg(target_arch = "x86_64")] ioctl_write_ptr!(kvm_enable_cap, KVMIO, 0xa3, KvmEnableCap); ioctl_write_ptr!(kvm_signal_msi, KVMIO, 0xa5, KvmMsi); From b478326a91577a64942dc0243dc33e9d688610b8 Mon Sep 17 00:00:00 2001 From: Changyuan Lyu Date: Fri, 19 Dec 2025 15:38:09 -0800 Subject: [PATCH 2/4] feat(kvm): enable KVM x2APIC API when supported Signed-off-by: Changyuan Lyu --- alioth/src/hv/kvm/vm/vm_x86_64.rs | 14 +++++++++++++- alioth/src/sys/linux/kvm.rs | 9 +++++++++ 2 files changed, 22 insertions(+), 1 deletion(-) diff --git a/alioth/src/hv/kvm/vm/vm_x86_64.rs b/alioth/src/hv/kvm/vm/vm_x86_64.rs index 73db3d3c..e6f67ffa 100644 --- a/alioth/src/hv/kvm/vm/vm_x86_64.rs +++ b/alioth/src/hv/kvm/vm/vm_x86_64.rs @@ -21,7 +21,7 @@ use crate::hv::kvm::sev::SevFd; use crate::hv::kvm::{KvmError, KvmVm, kvm_error}; use crate::hv::{Coco, Kvm, Result, VmConfig, error}; use crate::sys::kvm::{ - KvmCap, KvmCreateGuestMemfd, KvmEnableCap, KvmVmType, kvm_create_guest_memfd, + KvmCap, KvmCreateGuestMemfd, KvmEnableCap, KvmVmType, KvmX2apicApiFlag, kvm_create_guest_memfd, kvm_create_irqchip, kvm_memory_encrypt_op, kvm_set_identity_map_addr, kvm_set_tss_addr, }; use crate::sys::sev::{ @@ -99,6 +99,18 @@ impl KvmVm { _ => {} } } + + let x2apic_caps = + KvmX2apicApiFlag::USE_32BIT_IDS | KvmX2apicApiFlag::DISABLE_BROADCAST_QUIRK; + let request = KvmEnableCap { + cap: KvmCap::X2APIC_API, + args: [x2apic_caps.bits(), 0, 0, 0], + flags: 0, + pad: [0; 64], + }; + if let Err(e) = self.vm.enable_cap(&request) { + log::error!("Failed to enable KVM_CAP_X2APIC_API: {e:?}"); + } unsafe { kvm_create_irqchip(&self.vm.fd) }.context(error::CreateDevice)?; // TODO should be in parameters unsafe { kvm_set_tss_addr(&self.vm.fd, 0xf000_0000) }.context(error::SetVmParam)?; diff --git a/alioth/src/sys/linux/kvm.rs b/alioth/src/sys/linux/kvm.rs index b9e7f04d..1f6128bc 100644 --- a/alioth/src/sys/linux/kvm.rs +++ b/alioth/src/sys/linux/kvm.rs @@ -482,12 +482,21 @@ c_enum! { IRQFD = 32; SIGNAL_MSI = 77; ARM_PSCI_0_2 = 102; + X2APIC_API = 129; EXIT_HYPERCALL = 201; // GUEST_MEMFD = 234; // VM_TYPES = 235; } } +bitflags! { + #[derive(Default, Debug, Clone, Copy, PartialEq, Eq, Hash)] + pub struct KvmX2apicApiFlag: u64 { + const USE_32BIT_IDS = 1 << 0; + const DISABLE_BROADCAST_QUIRK = 1 << 1; + } +} + pub const KVM_HC_MAP_GPA_RANGE: u64 = 12; bitflags! { From 5759f4daed7e68b2bce36488ea3aa90c7b61bb16 Mon Sep 17 00:00:00 2001 From: Changyuan Lyu Date: Fri, 19 Dec 2025 15:39:54 -0800 Subject: [PATCH 3/4] feat(kvm): present MSI_EXT_DEST_ID to guest if supported KVM_FEATURE_MSI_EXT_DEST_ID allows the guest to encode bit 8-14 of x2APIC ID into bit 5-11 of MSI address. Theoretically, up to 32768 CPUs are supported with such hack. Signed-off-by: Changyuan Lyu --- alioth/src/arch/x86_64/intr.rs | 34 ++++++++++++++ alioth/src/arch/x86_64/x86_64.rs | 1 + alioth/src/hv/kvm/kvm.rs | 44 +++++------------- alioth/src/hv/kvm/kvm_x86_64.rs | 63 ++++++++++++++++++++++++++ alioth/src/hv/kvm/kvm_x86_64_test.rs | 4 +- alioth/src/hv/kvm/vm/vm.rs | 14 +++--- alioth/src/hv/kvm/vm/vm_aarch64.rs | 4 ++ alioth/src/hv/kvm/vm/vm_x86_64.rs | 17 +++++++ alioth/src/hv/kvm/vm/vm_x86_64_test.rs | 28 ++++++++++++ alioth/src/sys/linux/kvm.rs | 26 +++++++++++ 10 files changed, 194 insertions(+), 41 deletions(-) create mode 100644 alioth/src/arch/x86_64/intr.rs create mode 100644 alioth/src/hv/kvm/vm/vm_x86_64_test.rs diff --git a/alioth/src/arch/x86_64/intr.rs b/alioth/src/arch/x86_64/intr.rs new file mode 100644 index 00000000..6b798072 --- /dev/null +++ b/alioth/src/arch/x86_64/intr.rs @@ -0,0 +1,34 @@ +// Copyright 2025 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use bitfield::bitfield; + +bitfield! { + #[derive(Copy, Clone, Default, PartialEq, Eq, Hash)] + pub struct MsiAddrLo(u32); + impl Debug; + pub mode, set_mode : 2; + pub redirection, set_redirection : 3; + pub remappable, set_remappable : 4; + pub reserved, set_reserved : 11, 5; + pub dest_id, set_dest_id : 19, 12; + pub identifier, _: 31, 20; +} + +bitfield! { + #[derive(Copy, Clone, Default, PartialEq, Eq, Hash)] + pub struct MsiAddrHi(u32); + impl Debug; + pub dest_id, set_dest_id : 31, 8; +} diff --git a/alioth/src/arch/x86_64/x86_64.rs b/alioth/src/arch/x86_64/x86_64.rs index a8e154a8..a88a72ed 100644 --- a/alioth/src/arch/x86_64/x86_64.rs +++ b/alioth/src/arch/x86_64/x86_64.rs @@ -13,6 +13,7 @@ // limitations under the License. pub mod cpuid; +pub mod intr; pub mod layout; pub mod msr; pub mod paging; diff --git a/alioth/src/hv/kvm/kvm.rs b/alioth/src/hv/kvm/kvm.rs index 97e37701..4e86f8c2 100644 --- a/alioth/src/hv/kvm/kvm.rs +++ b/alioth/src/hv/kvm/kvm.rs @@ -47,11 +47,7 @@ use crate::ffi; use crate::hv::{Hypervisor, MemMapOption, Result, VmConfig, error}; #[cfg(target_arch = "aarch64")] use crate::sys::kvm::KvmDevType; -#[cfg(target_arch = "x86_64")] -use crate::sys::kvm::kvm_get_supported_cpuid; -use crate::sys::kvm::{KVM_API_VERSION, KvmCap, kvm_get_api_version}; -#[cfg(target_arch = "x86_64")] -use crate::sys::kvm::{KVM_MAX_CPUID_ENTRIES, KvmCpuid2, KvmCpuid2Flag, KvmCpuidEntry2}; +use crate::sys::kvm::{KVM_API_VERSION, KvmCap, kvm_check_extension, kvm_get_api_version}; use self::vm::KvmVm; @@ -144,6 +140,15 @@ impl Kvm { config, }) } + + pub fn check_extension(&self, id: KvmCap) -> Result { + let ret = unsafe { kvm_check_extension(&self.fd, id) }.context(kvm_error::CheckCap)?; + if ret == 0 { + kvm_error::NotSupported { ext: id }.fail() + } else { + Ok(ret) + } + } } impl Hypervisor for Kvm { @@ -155,33 +160,6 @@ impl Hypervisor for Kvm { #[cfg(target_arch = "x86_64")] fn get_supported_cpuids(&self) -> Result> { - let mut kvm_cpuid2 = KvmCpuid2 { - nent: KVM_MAX_CPUID_ENTRIES as u32, - padding: 0, - entries: [KvmCpuidEntry2::default(); KVM_MAX_CPUID_ENTRIES], - }; - unsafe { kvm_get_supported_cpuid(&self.fd, &mut kvm_cpuid2) }.context(error::GuestCpuid)?; - let map_f = |e: &KvmCpuidEntry2| { - let in_ = CpuidIn { - func: e.function, - index: if e.flags.contains(KvmCpuid2Flag::SIGNIFCANT_INDEX) { - Some(e.index) - } else { - None - }, - }; - let out = CpuidResult { - eax: e.eax, - ebx: e.ebx, - ecx: e.ecx, - edx: e.edx, - }; - (in_, out) - }; - let cpuids = kvm_cpuid2.entries[0..kvm_cpuid2.nent as usize] - .iter() - .map(map_f) - .collect(); - Ok(cpuids) + Kvm::get_supported_cpuids(self) } } diff --git a/alioth/src/hv/kvm/kvm_x86_64.rs b/alioth/src/hv/kvm/kvm_x86_64.rs index 42683163..8e8f8ac1 100644 --- a/alioth/src/hv/kvm/kvm_x86_64.rs +++ b/alioth/src/hv/kvm/kvm_x86_64.rs @@ -12,6 +12,69 @@ // See the License for the specific language governing permissions and // limitations under the License. +use std::arch::x86_64::CpuidResult; +use std::collections::HashMap; + +use snafu::ResultExt; + +use crate::arch::cpuid::CpuidIn; +use crate::hv::{Kvm, Result, error}; +use crate::sys::kvm::{ + KVM_CPUID_FEATURES, KVM_MAX_CPUID_ENTRIES, KvmCap, KvmCpuid2, KvmCpuid2Flag, KvmCpuidEntry2, + KvmCpuidFeature, KvmX2apicApiFlag, kvm_get_supported_cpuid, +}; + +impl Kvm { + pub fn get_supported_cpuids(&self) -> Result> { + let mut kvm_cpuid2 = KvmCpuid2 { + nent: KVM_MAX_CPUID_ENTRIES as u32, + padding: 0, + entries: [KvmCpuidEntry2::default(); KVM_MAX_CPUID_ENTRIES], + }; + unsafe { kvm_get_supported_cpuid(&self.fd, &mut kvm_cpuid2) }.context(error::GuestCpuid)?; + let map_f = |e: &KvmCpuidEntry2| { + let in_ = CpuidIn { + func: e.function, + index: if e.flags.contains(KvmCpuid2Flag::SIGNIFCANT_INDEX) { + Some(e.index) + } else { + None + }, + }; + let out = CpuidResult { + eax: e.eax, + ebx: e.ebx, + ecx: e.ecx, + edx: e.edx, + }; + (in_, out) + }; + let mut cpuids: HashMap<_, _> = kvm_cpuid2 + .entries + .iter() + .take(kvm_cpuid2.nent as usize) + .map(map_f) + .collect(); + + // Enable KVM_FEATURE_MSI_EXT_DEST_ID if KVM_CAP_X2APIC_API is supported + let ext = self.check_extension(KvmCap::X2APIC_API)?; + let flag = KvmX2apicApiFlag::from_bits_retain(ext as u64); + let x2apic_flags = + KvmX2apicApiFlag::USE_32BIT_IDS | KvmX2apicApiFlag::DISABLE_BROADCAST_QUIRK; + let leaf_features = CpuidIn { + func: KVM_CPUID_FEATURES, + index: None, + }; + if let Some(entry) = cpuids.get_mut(&leaf_features) + && flag.contains(x2apic_flags) + { + entry.eax |= KvmCpuidFeature::MSI_EXT_DEST_ID.bits(); + } + + Ok(cpuids) + } +} + #[cfg(test)] #[path = "kvm_x86_64_test.rs"] mod tests; diff --git a/alioth/src/hv/kvm/kvm_x86_64_test.rs b/alioth/src/hv/kvm/kvm_x86_64_test.rs index 5917bd12..faceeaf8 100644 --- a/alioth/src/hv/kvm/kvm_x86_64_test.rs +++ b/alioth/src/hv/kvm/kvm_x86_64_test.rs @@ -12,8 +12,8 @@ // See the License for the specific language governing permissions and // limitations under the License. -use crate::hv::Hypervisor; use crate::hv::kvm::{Kvm, KvmConfig}; +use crate::sys::kvm::KVM_CPUID_SIGNATURE; #[test] #[cfg_attr(not(feature = "test-hv"), ignore)] @@ -22,7 +22,7 @@ fn test_get_supported_cpuid() { let mut kvm_cpuid_exist = false; let supported_cpuids = kvm.get_supported_cpuids().unwrap(); for (in_, out) in &supported_cpuids { - if in_.func == 0x4000_0000 + if in_.func == KVM_CPUID_SIGNATURE && out.ebx.to_le_bytes() == *b"KVMK" && out.ecx.to_le_bytes() == *b"VMKV" && out.edx.to_le_bytes() == *b"M\0\0\0" diff --git a/alioth/src/hv/kvm/vm/vm.rs b/alioth/src/hv/kvm/vm/vm.rs index 3a019533..642fdf99 100644 --- a/alioth/src/hv/kvm/vm/vm.rs +++ b/alioth/src/hv/kvm/vm/vm.rs @@ -57,9 +57,9 @@ use crate::sys::kvm::{ }; #[cfg(target_arch = "aarch64")] -use self::aarch64::VmArch; +use self::aarch64::{VmArch, translate_msi_addr}; #[cfg(target_arch = "x86_64")] -use self::x86_64::VmArch; +use self::x86_64::{VmArch, translate_msi_addr}; #[derive(Debug)] pub struct VmInner { @@ -102,9 +102,10 @@ impl VmInner { { entries[index].flags = KvmMsiFlag::VALID_DEVID; } + let (lo, hi) = translate_msi_addr(entry.addr_lo, entry.addr_hi); entries[index].routing.msi = KvmIrqRoutingMsi { - address_hi: entry.addr_hi, - address_lo: entry.addr_lo, + address_hi: hi, + address_lo: lo, data: entry.data, #[cfg(target_arch = "aarch64")] devid: entry.devid, @@ -474,9 +475,10 @@ impl MsiSender for KvmMsiSender { type IrqFd = KvmIrqFd; fn send(&self, addr: u64, data: u32) -> Result<()> { + let (lo, hi) = translate_msi_addr(addr as u32, (addr >> 32) as u32); let kvm_msi = KvmMsi { - address_lo: addr as u32, - address_hi: (addr >> 32) as u32, + address_lo: lo, + address_hi: hi, data, #[cfg(target_arch = "aarch64")] devid: self.devid, diff --git a/alioth/src/hv/kvm/vm/vm_aarch64.rs b/alioth/src/hv/kvm/vm/vm_aarch64.rs index 819b78a4..41970268 100644 --- a/alioth/src/hv/kvm/vm/vm_aarch64.rs +++ b/alioth/src/hv/kvm/vm/vm_aarch64.rs @@ -23,6 +23,10 @@ use crate::sys::kvm::{ KvmVmType, }; +pub fn translate_msi_addr(addr_lo: u32, addr_hi: u32) -> (u32, u32) { + (addr_lo, addr_hi) +} + #[derive(Debug)] pub struct KvmGicV2m; diff --git a/alioth/src/hv/kvm/vm/vm_x86_64.rs b/alioth/src/hv/kvm/vm/vm_x86_64.rs index e6f67ffa..27086caf 100644 --- a/alioth/src/hv/kvm/vm/vm_x86_64.rs +++ b/alioth/src/hv/kvm/vm/vm_x86_64.rs @@ -16,6 +16,7 @@ use std::os::fd::{AsFd, AsRawFd, FromRawFd, OwnedFd}; use snafu::ResultExt; +use crate::arch::intr::{MsiAddrHi, MsiAddrLo}; use crate::arch::sev::{SevPolicy, SevStatus, SnpPageType, SnpPolicy}; use crate::hv::kvm::sev::SevFd; use crate::hv::kvm::{KvmError, KvmVm, kvm_error}; @@ -29,6 +30,18 @@ use crate::sys::sev::{ KvmSevLaunchUpdateData, KvmSevSnpLaunchFinish, KvmSevSnpLaunchStart, KvmSevSnpLaunchUpdate, }; +pub fn translate_msi_addr(addr_lo: u32, addr_hi: u32) -> (u32, u32) { + let mut addr_lo = MsiAddrLo(addr_lo); + if addr_lo.reserved() == 0 || addr_lo.remappable() || addr_hi != 0 { + return (addr_lo.0, addr_hi); + } + + let mut addr_hi = MsiAddrHi(0); + addr_hi.set_dest_id(addr_lo.reserved()); + addr_lo.set_reserved(0); + (addr_lo.0, addr_hi.0) +} + #[derive(Debug)] pub struct VmArch { pub sev_fd: Option, @@ -209,3 +222,7 @@ impl KvmVm { Ok(()) } } + +#[cfg(test)] +#[path = "vm_x86_64_test.rs"] +mod test; diff --git a/alioth/src/hv/kvm/vm/vm_x86_64_test.rs b/alioth/src/hv/kvm/vm/vm_x86_64_test.rs new file mode 100644 index 00000000..f0a09afa --- /dev/null +++ b/alioth/src/hv/kvm/vm/vm_x86_64_test.rs @@ -0,0 +1,28 @@ +// Copyright 2025 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use rstest::rstest; + +use crate::hv::kvm::vm::x86_64::translate_msi_addr; + +#[rstest] +#[case(0, 0)] +#[case(0xfee0_0010, 0xfee0_0010)] +#[case(0xfee0_1000, 0xfee0_1000)] +#[case(0x100_fee0_1000, 0x100_fee0_1000)] +#[case(0xfee0_1020, 0x100_fee0_1000)] +fn test_translate_msi_addr(#[case] addr: u64, #[case] expected: u64) { + let (lo, hi) = translate_msi_addr(addr as u32, (addr >> 32) as u32); + assert_eq!((lo as u64) | ((hi as u64) << 32), expected); +} diff --git a/alioth/src/sys/linux/kvm.rs b/alioth/src/sys/linux/kvm.rs index 1f6128bc..b579b382 100644 --- a/alioth/src/sys/linux/kvm.rs +++ b/alioth/src/sys/linux/kvm.rs @@ -79,6 +79,32 @@ pub struct KvmCpuid2 { pub entries: [KvmCpuidEntry2; N], } +pub const KVM_CPUID_SIGNATURE: u32 = 0x4000_0000; +pub const KVM_CPUID_FEATURES: u32 = 0x4000_0001; + +bitflags! { + #[derive(Debug, Clone, Copy, Default)] + pub struct KvmCpuidFeature: u32 { + const CLOCKSOURCE = 1 << 0; + const NOP_IO_DELAY = 1 << 1; + const MMU_OP = 1 << 2; + const CLOCKSOURCE2 = 1 << 3; + const ASYNC_PF = 1 << 4; + const STEAL_TIME = 1 << 5; + const PV_EOI = 1 << 6; + const PV_UNHALT = 1 << 7; + const PV_TLB_FLUSH = 1 << 9; + const ASYNC_PF_VMEXIT = 1 << 10; + const PV_SEND_IPI = 1 << 11; + const POLL_CONTROL = 1 << 12; + const PV_SCHED_YIELD = 1 << 13; + const ASYNC_PF_INT = 1 << 14; + const MSI_EXT_DEST_ID = 1 << 15; + const HC_MAP_GPA_RANGE = 1 << 16; + const MIGRATION_CONTROL = 1 << 17; + } +} + #[cfg(target_arch = "x86_64")] #[repr(C)] #[derive(Debug, Copy, Clone, Default)] From e08baecc918522abd50a29599f9b1bf9b715828f Mon Sep 17 00:00:00 2001 From: Changyuan Lyu Date: Fri, 19 Dec 2025 15:41:49 -0800 Subject: [PATCH 4/4] feat(loader): boot guest with x2APIC enabled When booted in x2APC mode, Linux is able to bring up CPUs with x2APIC ID > 254. Signed-off-by: Changyuan Lyu --- alioth/src/arch/x86_64/msr.rs | 11 +++++++++++ alioth/src/loader/linux/linux_x86_64.rs | 12 +++++++++--- alioth/src/loader/xen/xen.rs | 10 +++++++++- 3 files changed, 29 insertions(+), 4 deletions(-) diff --git a/alioth/src/arch/x86_64/msr.rs b/alioth/src/arch/x86_64/msr.rs index 29d5df4f..803fe49e 100644 --- a/alioth/src/arch/x86_64/msr.rs +++ b/alioth/src/arch/x86_64/msr.rs @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +use bitfield::bitfield; use bitflags::bitflags; // Intel Vol.4, Table 2-2. @@ -47,3 +48,13 @@ bitflags! { const FAST_STRINGS = 1 << 0; } } + +bitfield! { + #[derive(Copy, Clone, Default, PartialEq, Eq, Hash)] + pub struct ApicBase(u64); + impl Debug; + pub bsp, set_bsp : 8; + pub x2apic, set_x2apic : 10; + pub xapic, set_xapic : 11; + pub base, set_base : 35, 12; +} diff --git a/alioth/src/loader/linux/linux_x86_64.rs b/alioth/src/loader/linux/linux_x86_64.rs index cd1dabe5..ae028713 100644 --- a/alioth/src/loader/linux/linux_x86_64.rs +++ b/alioth/src/loader/linux/linux_x86_64.rs @@ -22,10 +22,10 @@ use snafu::ResultExt; use zerocopy::{FromZeros, IntoBytes}; use crate::arch::layout::{ - BOOT_GDT_START, BOOT_PAGING_START, EBDA_START, KERNEL_CMDLINE_LIMIT, KERNEL_CMDLINE_START, - KERNEL_IMAGE_START, LINUX_BOOT_PARAMS_START, + APIC_START, BOOT_GDT_START, BOOT_PAGING_START, EBDA_START, KERNEL_CMDLINE_LIMIT, + KERNEL_CMDLINE_START, KERNEL_IMAGE_START, LINUX_BOOT_PARAMS_START, }; -use crate::arch::msr::Efer; +use crate::arch::msr::{ApicBase, Efer}; use crate::arch::paging::Entry; use crate::arch::reg::{ Cr0, Cr4, DtReg, DtRegVal, Reg, Rflags, SReg, SegAccess, SegReg, SegRegVal, @@ -239,6 +239,11 @@ pub fn load>( let idtr = DtRegVal { base: 0, limit: 0 }; memory.write_t(BOOT_GDT_START, &gdt)?; + let mut apic_base = ApicBase(APIC_START); + apic_base.set_bsp(true); + apic_base.set_xapic(true); + apic_base.set_x2apic(true); + Ok(InitState { regs: vec![ (Reg::Rsi, LINUX_BOOT_PARAMS_START), @@ -250,6 +255,7 @@ pub fn load>( (SReg::Cr0, (Cr0::NE | Cr0::PE | Cr0::PG).bits() as u64), (SReg::Cr3, pml4_start), (SReg::Cr4, Cr4::PAE.bits() as u64), + (SReg::ApicBase, apic_base.0), ], seg_regs: vec![ (SegReg::Cs, boot_cs), diff --git a/alioth/src/loader/xen/xen.rs b/alioth/src/loader/xen/xen.rs index 0dc0f9df..e0ff14f5 100644 --- a/alioth/src/loader/xen/xen.rs +++ b/alioth/src/loader/xen/xen.rs @@ -25,8 +25,10 @@ use zerocopy::{FromZeros, Immutable, IntoBytes}; use crate::align_up; use crate::arch::layout::{ - BOOT_GDT_START, EBDA_START, HVM_START_INFO_START, KERNEL_CMDLINE_LIMIT, KERNEL_CMDLINE_START, + APIC_START, BOOT_GDT_START, EBDA_START, HVM_START_INFO_START, KERNEL_CMDLINE_LIMIT, + KERNEL_CMDLINE_START, }; +use crate::arch::msr::ApicBase; use crate::arch::reg::{Cr0, DtReg, DtRegVal, Reg, Rflags, SReg, SegAccess, SegReg, SegRegVal}; use crate::loader::elf::{ ELF_HEADER_MAGIC, ELF_IDENT_CLASS_64, ELF_IDENT_LITTLE_ENDIAN, Elf64Header, Elf64Note, @@ -304,6 +306,11 @@ pub fn load>( let idtr = DtRegVal { base: 0, limit: 0 }; + let mut apic_base = ApicBase(APIC_START); + apic_base.set_bsp(true); + apic_base.set_xapic(true); + apic_base.set_x2apic(true); + Ok(InitState { regs: vec![ (Reg::Rbx, HVM_START_INFO_START), @@ -314,6 +321,7 @@ pub fn load>( (SReg::Cr0, Cr0::PE.bits() as u64), (SReg::Cr4, 0), (SReg::Efer, 0), + (SReg::ApicBase, apic_base.0), ], seg_regs: vec![ (SegReg::Cs, boot_cs),