Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
75 commits
Select commit Hold shift + click to select a range
891d849
arch: Initial data structures for describing CPUID outputs
olivereanderson Oct 7, 2025
664200c
hypervisor: Implement common traits for HypervisorType and CpuVendor
olivereanderson Oct 7, 2025
62bc2dd
arch: CpuProfile data structures
olivereanderson Oct 7, 2025
0fd99c2
misc: Make CPU profile part of various configs
olivereanderson Oct 7, 2025
4e0664b
arch: Apply CPU profile (if any) when generating common CPUID
olivereanderson Oct 7, 2025
222862a
arch: Profile generation logic
olivereanderson Oct 7, 2025
3816b3a
arch: CPUID definitions: intel & kvm
olivereanderson Oct 7, 2025
36a0311
hypervisor: Derive Eq for HypervisorType and CpuVendor
olivereanderson Oct 7, 2025
3dc3f8b
cpu-profile-generation: CPU profile generation CLI
olivereanderson Oct 7, 2025
851d19c
arch: Temporarily add a CPU profile for local testing
olivereanderson Oct 7, 2025
aca51f7
arch: Log error if profile deserialization fails
olivereanderson Oct 7, 2025
2bfc8b8
arch: Check for CPU Vendor compatibility with CPU profile
olivereanderson Oct 7, 2025
9b3e7f3
Add skylake profile
olivereanderson Oct 29, 2025
57f43fd
sapphire rapids profile
olivereanderson Nov 4, 2025
766c94f
Change profile policy for waitpkg
olivereanderson Nov 5, 2025
334827d
Update sapphire rapids profile
olivereanderson Nov 5, 2025
a7ea76c
Update skylake profile
olivereanderson Nov 5, 2025
92c1715
Inherit instead of passthrough
olivereanderson Nov 12, 2025
b9a177f
Update policy for local apic
olivereanderson Nov 12, 2025
e4e02b7
Leaf 0x1 updates
olivereanderson Nov 12, 2025
2384066
leaf 0x5
olivereanderson Nov 12, 2025
dc64e0d
leaf 0x7 sub-leaf 0
olivereanderson Nov 12, 2025
b67e6a1
Leaf 0x7
olivereanderson Nov 12, 2025
68d8b7f
More fixes
olivereanderson Nov 12, 2025
bd5ccc7
Update sapphire rapids profile
olivereanderson Nov 12, 2025
4ccfea0
Update Skylake profile
olivereanderson Nov 12, 2025
61bdbe9
Adapt profiles to work with QEMU + comment out compatibility target c…
olivereanderson Nov 12, 2025
9e6cd6d
Remove compatibility target. Still need to regenerate profiles
olivereanderson Nov 14, 2025
bf43d02
Update Skylake profile
olivereanderson Nov 14, 2025
8c1e3f9
Update Sapphire rapids profile
olivereanderson Nov 14, 2025
356009a
x
olivereanderson Nov 17, 2025
7afd3e7
Log more information about the bits that fail the cpuid compatibility…
olivereanderson Nov 17, 2025
5427ca1
Remove dev profile
olivereanderson Nov 17, 2025
c4f77c2
Quick fix: Skip checks for MPX state components
olivereanderson Nov 18, 2025
4c78bcb
Quick fix: Overwrite policies for x2APIC and APIC on chip
olivereanderson Nov 18, 2025
f5a357a
Update Skylake profile
olivereanderson Nov 18, 2025
fc4636f
Update Sapphire Rapids profile
olivereanderson Nov 18, 2025
58209ad
Logging for debug purposes
olivereanderson Nov 21, 2025
b16aae5
Zero out TSX
olivereanderson Nov 25, 2025
af64035
Regenerate profiles
olivereanderson Nov 25, 2025
11919e0
Hex encoded serialization
olivereanderson Nov 27, 2025
dd85261
Pretty print CPU profile JSON
olivereanderson Nov 27, 2025
d16e3c9
Regenerate CPU profiles
olivereanderson Nov 27, 2025
f2b1683
Check for AMX CPUID (incomplete first attempt probably wrong approach)
olivereanderson Nov 28, 2025
824db90
Enable TILE state components when AMX is available during profile gen…
olivereanderson Nov 28, 2025
3d38811
fixed logic bugs
olivereanderson Nov 28, 2025
65cfd70
Update Sapphire Rapids profile
olivereanderson Nov 28, 2025
a5bfea9
hypervisor: Make amx_supported public
olivereanderson Dec 3, 2025
a769bac
Use XsaveState::enable_amx_state_components during profile generation
olivereanderson Dec 4, 2025
b787059
hypervisor: Introduce an enable AMX tile state components method
olivereanderson Dec 4, 2025
82f651b
vmm: Refactor amx tile state component enabling logic
olivereanderson Dec 4, 2025
d2b9691
arch: Enable amx when available during profile generation
olivereanderson Dec 4, 2025
8ad4a1b
Use hypervisor method
olivereanderson Dec 4, 2025
4733b20
add check on vmm
olivereanderson Dec 4, 2025
a369b5e
x
olivereanderson Dec 4, 2025
e3e0d5f
x
olivereanderson Dec 4, 2025
7a1e628
Add more permitted missing cpuid entries
olivereanderson Dec 5, 2025
81954a6
Modify profile data to reflect AMX requirements
olivereanderson Dec 5, 2025
c197ef8
Temporary fix: Set XSAVE size in hypervisor::enable_amx_state_components
olivereanderson Dec 5, 2025
f6ff87e
Fix bug
olivereanderson Dec 5, 2025
46ec701
Change processor brand string
olivereanderson Dec 8, 2025
0f9d1e1
Remove MPX properly
olivereanderson Dec 8, 2025
ddb6153
Change brand string policy
olivereanderson Dec 8, 2025
bc0605f
Use clap
olivereanderson Dec 8, 2025
74aedeb
Fix
olivereanderson Dec 8, 2025
27edb67
x
olivereanderson Dec 8, 2025
cf1ae8a
x
olivereanderson Dec 8, 2025
c0c0fb6
x
olivereanderson Dec 8, 2025
c3082d9
Regenerate Sapphire rapids
olivereanderson Dec 8, 2025
c639cad
Rename Overwrite to Static
olivereanderson Dec 9, 2025
9be2490
Fix typos
olivereanderson Dec 9, 2025
91bf088
As slice method instead of exposing internals
olivereanderson Dec 9, 2025
bf5c438
Remove migration compatibility requirement
olivereanderson Dec 9, 2025
3d01ba8
Explain why we don't use a RangeInclusive
olivereanderson Dec 9, 2025
1dbf0f7
Update skylake profile
olivereanderson Dec 9, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

11 changes: 11 additions & 0 deletions arch/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -4,21 +4,32 @@ edition.workspace = true
name = "arch"
version = "0.1.0"

# TODO: Consider making this a binary of the main package instead
[[bin]]
name = "generate-cpu-profile"
path = "src/bin/generate-cpu-profile.rs"
required-features = ["cpu_profile_generation"]

[features]
default = []
fw_cfg = []
kvm = ["hypervisor/kvm"]
sev_snp = []
tdx = []
# Currently cpu profiles can only be generated with KVM
cpu_profile_generation = ["kvm", "dep:clap"]

[dependencies]
anyhow = { workspace = true }
byteorder = { workspace = true }
clap = { workspace = true, optional = true }
hypervisor = { path = "../hypervisor" }
libc = { workspace = true }
linux-loader = { workspace = true, features = ["bzimage", "elf", "pe"] }
log = { workspace = true }
serde = { workspace = true, features = ["derive", "rc"] }
# We currently use this for (de-)serializing CPU profile data
serde_json = { workspace = true }
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

optional = true

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It should always be required for x86_64 as it is also used when loading CPU profile data (not only generation). We can make it optional for other architectures where there is no concept of CPU profiles (yet) though.

The alternative would be for the CLI to generate .rs files instead which might be something to consider as a follow up task, but I think serde_json is good enough for the PoC.

thiserror = { workspace = true }
uuid = { workspace = true }
vm-memory = { workspace = true, features = ["backend-bitmap", "backend-mmap"] }
Expand Down
33 changes: 33 additions & 0 deletions arch/src/bin/generate-cpu-profile.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
#![cfg(all(
target_arch = "x86_64",
feature = "cpu_profile_generation",
feature = "kvm"
))]
use anyhow::Context;
use clap::{Arg, Command};
use std::io::BufWriter;

fn main() -> anyhow::Result<()> {
let cmd_arg = Command::new("generate-cpu-profile")
.version(env!("CARGO_PKG_VERSION"))
.arg_required_else_help(true)
.arg(
Arg::new("name")
.help("The name to give the CPU profile")
.num_args(1)
.required(true),
)
.get_matches();

let profile_name = cmd_arg.get_one::<String>("name").unwrap();

let hypervisor = hypervisor::new().context("Could not obtain hypervisor")?;
// TODO: Consider letting the user provide a file path as a target instead of writing to stdout.
// The way it is now should be sufficient for a PoC however.
let writer = BufWriter::new(std::io::stdout().lock());
arch::x86_64::cpu_profile_generation::generate_profile_data(
writer,
hypervisor.as_ref(),
profile_name,
)
}
30 changes: 30 additions & 0 deletions arch/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,12 +12,17 @@
extern crate log;

use std::collections::BTreeMap;
use std::str::FromStr;
use std::sync::Arc;
use std::{fmt, result};

use serde::de::IntoDeserializer;
use serde::{Deserialize, Serialize};
use thiserror::Error;

#[cfg(target_arch = "x86_64")]
pub use crate::x86_64::cpu_profile::CpuProfile;

type GuestMemoryMmap = vm_memory::GuestMemoryMmap<vm_memory::bitmap::AtomicBitmap>;
type GuestRegionMmap = vm_memory::GuestRegionMmap<vm_memory::bitmap::AtomicBitmap>;

Expand Down Expand Up @@ -56,6 +61,31 @@ pub enum Error {
/// Type for returning public functions outcome.
pub type Result<T> = result::Result<T, Error>;

// If the target_arch is x86_64 we import CpuProfile from the x86_64 module, otherwise we
// declare it here.
#[cfg(not(target_arch = "x86_64"))]
#[derive(Debug, Default, Clone, Copy, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
#[serde(rename_all = "kebab-case")]
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

why kebab-case? it's fine, just asking

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If I recall correctly that is what QEMU uses for its CPU models. I thought it would be a good idea to stay consistent.

/// A [`CpuProfile`] is a mechanism for ensuring live migration compatibility
/// between host's with potentially different CPU models.
pub enum CpuProfile {
#[default]
Host,
}

impl FromStr for CpuProfile {
type Err = serde::de::value::Error;
fn from_str(s: &str) -> result::Result<Self, Self::Err> {
// Should accept both plain strings, and strings surrounded by `"`.
let normalized = s
.strip_prefix('"')
.unwrap_or(s)
.strip_suffix('"')
.unwrap_or(s);
Self::deserialize(normalized.into_deserializer())
}
}

/// Type for memory region types.
#[derive(Clone, Copy, PartialEq, Eq, Debug, Serialize, Deserialize)]
pub enum RegionType {
Expand Down
239 changes: 239 additions & 0 deletions arch/src/x86_64/cpu_profile.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,239 @@
use hypervisor::arch::x86::CpuIdEntry;
use hypervisor::{CpuVendor, HypervisorType};
use serde::{Deserialize, Serialize};

use crate::x86_64::CpuidReg;
use crate::x86_64::cpuid_definitions::{Parameters, deserialize_from_hex, serialize_as_hex};

#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, Default)]
#[serde(rename_all = "kebab-case")]
#[allow(non_camel_case_types)]
/// A [`CpuProfile`] is a mechanism for ensuring live migration compatibility
/// between host's with potentially different CPU models.
pub enum CpuProfile {
#[default]
Host,
Skylake,
SapphireRapids,
}

impl CpuProfile {
// We can only generate CPU profiles for the KVM hypervisor for the time being.
#[cfg(feature = "kvm")]
pub(in crate::x86_64) fn data(&self, amx: bool) -> Option<CpuProfileData> {
let mut data: CpuProfileData = match self {
Self::Host => None,
Self::Skylake => Some(
serde_json::from_slice(include_bytes!("cpu_profiles/skylake.json"))
.inspect_err(|e| {
error!("BUG: could not deserialize CPU profile. Got error: {:?}", e)
})
.expect("should be able to deserialize pre-generated data"),
),
Self::SapphireRapids => Some(
serde_json::from_slice(include_bytes!("cpu_profiles/sapphire-rapids.json"))
.inspect_err(|e| {
error!("BUG: could not deserialize CPU profile. Got error: {:?}", e)
})
.expect("should be able to deserialize pre-generated data"),
),
}?;

if !amx {
// In this case we will need to wipe out the AMX tile state components (if they are included in the profile)
for adj in data.adjustments.iter_mut() {
if adj.0.sub_leaf.start() != adj.0.sub_leaf.end() {
continue;
}
let sub_leaf = *adj.0.sub_leaf.start();
let leaf = adj.0.leaf;
if (leaf == 0xd) && (sub_leaf == 0) && (adj.0.register == CpuidReg::EAX) {
adj.1.replacements &= !((1 << 17) | (1 << 18));
}

if (leaf == 0xd) && (sub_leaf == 1) && (adj.0.register == CpuidReg::ECX) {
adj.1.replacements &= !((1 << 17) | (1 << 18));
}

if (leaf == 0xd) && ((sub_leaf == 17) | (sub_leaf == 18)) {
adj.1.replacements = 0;
}
}
}

Some(data)
}

#[cfg(not(feature = "kvm"))]
pub(in crate::x86_64) fn data(&self) -> Option<CpuProfileData> {
unimplemented!()
}
}

/// Every [`CpuProfile`] different from `Host` has associated [`CpuProfileData`].
///
/// New constructors of this struct may only be generated through the CHV CLI (when built from source with
/// the `cpu-profile-generation` feature) which other hosts may then attempt to load in order to
/// increase the likelyhood of successful live migrations among all hosts that opted in to the given
/// CPU profile.
#[derive(Debug, Clone, Serialize, Deserialize)]
#[allow(dead_code)]
pub struct CpuProfileData {
/// The hypervisor used when generating this CPU profile.
pub(in crate::x86_64) hypervisor: HypervisorType,
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

TIL this syntax

/// The vendor of the CPU belonging to the host that generated this CPU profile.
pub(in crate::x86_64) cpu_vendor: CpuVendor,
/// Adjustments necessary to become compatible with the desired target.
pub(in crate::x86_64) adjustments: Vec<(Parameters, CpuidOutputRegisterAdjustments)>,
}

/* TODO: The [`CpuProfile`] struct will likely need a few more iterations. The following
sections should explain why:

# MSR restrictions

CPU profiles also need to restrict which MSRs may be manipulated by the guest as various physical CPUs
can have differing supported MSRs.

The CPU profile will thus necessarily need to contain some data related to MSR restrictions. That will
be taken care of in a follow up MR.

# Raw hardware CPUID for advanced opt-in features

Some more advanced CPU Features may either not be present when prompting the hypervisor for supported CPUID
enries (especially if this is done with the hypervisor in its default configuration), or may otherwise be
declared to be overwritten by all CPU profiles (as a safest default).

We may still want to let users opt-in to using such features if permitted by the hardware and hypervisor
however. Hence we may also want the `CpuProfile` to contain all CPUID entries obtained directly from the
hardware of the host the profile was built from.

This hardware information can then later be used on other hosts running under this pre-generated CPU
profile whenever the user wants to opt-in to more advanced CPU futures. If we can determine that the
feature is satisfied by both the hypervisor, the hardware of the host generating the profile, and the
current host then this should preserve live migration compatibility (unless the feature in inherently
incompatible with live migration of course).
*/

/// Used for adjusting an entire cpuid output register (EAX, EBX, ECX or EDX)
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
pub(super) struct CpuidOutputRegisterAdjustments {
#[serde(serialize_with = "serialize_as_hex")]
#[serde(deserialize_with = "deserialize_from_hex")]
pub(in crate::x86_64) replacements: u32,
/// Used to zero out the area `replacements` occupy. This mask is not necessarily !replacements, as replacements may pack values of different types (i.e. it is wrong to think of it as a bitset conceptually speaking).
#[serde(serialize_with = "serialize_as_hex")]
#[serde(deserialize_with = "deserialize_from_hex")]
pub(in crate::x86_64) mask: u32,
}
impl CpuidOutputRegisterAdjustments {
pub(in crate::x86_64) fn adjust(self, cpuid_output_register: &mut u32) {
let temp_register_copy = *cpuid_output_register;
let replacements_area_masked_in_temp_copy = temp_register_copy & self.mask;
*cpuid_output_register = replacements_area_masked_in_temp_copy | self.replacements;
}

pub(in crate::x86_64) fn adjust_cpuid_entries(
mut cpuid: Vec<CpuIdEntry>,
adjustments: &[(Parameters, Self)],
) -> Result<Vec<CpuIdEntry>, MissingCpuidEntriesError> {
for entry in &mut cpuid {
for (reg, reg_value) in [
(CpuidReg::EAX, &mut entry.eax),
(CpuidReg::EBX, &mut entry.ebx),
(CpuidReg::ECX, &mut entry.ecx),
(CpuidReg::EDX, &mut entry.edx),
] {
// Get the adjustment corresponding to the entry's function/leaf and index/sub-leaf for each of the register. If no such
// adjustment is found we use the trivial adjustment (leading to the register being zeroed out entirely).
let adjustment = adjustments
.iter()
.find_map(|(param, adjustment)| {
((param.leaf == entry.function)
& param.sub_leaf.contains(&entry.index)
& (param.register == reg))
.then_some(*adjustment)
})
.unwrap_or(CpuidOutputRegisterAdjustments {
mask: 0,
replacements: 0,
});
adjustment.adjust(reg_value);
}
}
// Check that we found every value that was supposed to be replaced with something else than 0
let mut missing_entry = false;

let eax_0xd_0 = cpuid
.iter()
.find(|entry| (entry.function == 0xd) && (entry.index == 0))
.map(|entry| entry.eax)
.unwrap_or(0);
let ecx_0xd_1 = cpuid
.iter()
.find(|entry| (entry.function == 0xd) && (entry.index == 1))
.map(|entry| entry.ecx)
.unwrap_or(0);

let edx_0xd_0 = cpuid
.iter()
.find(|entry| (entry.function == 0xd) && (entry.index == 0))
.map(|entry| entry.edx)
.unwrap_or(0);
let edx_0xd_1 = cpuid
.iter()
.find(|entry| (entry.function == 0xd) && (entry.index == 1))
.map(|entry| entry.edx)
.unwrap_or(0);

for (param, adjustment) in adjustments {
if adjustment.replacements == 0 {
continue;
}
let sub_start = *param.sub_leaf.start();
let sub_end = *param.sub_leaf.end();
if (param.leaf == 0xd) && (sub_start >= 2) && (sub_start < 32) && (sub_start == sub_end)
{
if (((1 << sub_start) & eax_0xd_0) == 0) && (((1 << sub_start) & ecx_0xd_1) == 0) {
// This means that the sub-leaf is to be considered invalid anyway and it is OK if we don't find it
continue;
}
}

if (param.leaf == 0xd) && (sub_start >= 32) && (sub_start < 64) {
if (((1 << (sub_start - 32)) & edx_0xd_0) == 0)
&& (((1 << (sub_start - 32)) & edx_0xd_1) == 0)
{
// This means that the sub-leaf is to be considered invalid anyway and it is OK if we don't find it
continue;
}
}

if !cpuid.iter().any(|entry| {
(entry.function == param.leaf) && (param.sub_leaf.contains(&entry.index))
}) {
error!(
"cannot adjust CPU profile. No entry found matching the required parameters: {:?}",
param
);
missing_entry = true;
}
}
if missing_entry {
Err(MissingCpuidEntriesError)
} else {
Ok(cpuid)
}
}
}

#[derive(Debug)]
pub(in crate::x86_64) struct MissingCpuidEntriesError;

impl core::fmt::Display for MissingCpuidEntriesError {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
f.write_str("Required CPUID entries not found")
}
}

impl core::error::Error for MissingCpuidEntriesError {}
Loading
Loading