-
Notifications
You must be signed in to change notification settings - Fork 2
CPU Profiles (Part 1: CPUID) #25
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
891d849
664200c
62bc2dd
0fd99c2
4e0664b
222862a
3816b3a
36a0311
3dc3f8b
851d19c
aca51f7
2bfc8b8
9b3e7f3
57f43fd
766c94f
334827d
a7ea76c
92c1715
b9a177f
e4e02b7
2384066
dc64e0d
b67e6a1
68d8b7f
bd5ccc7
4ccfea0
61bdbe9
9e6cd6d
bf43d02
8c1e3f9
356009a
7afd3e7
5427ca1
c4f77c2
4c78bcb
f5a357a
fc4636f
58209ad
b16aae5
af64035
11919e0
dd85261
d16e3c9
f2b1683
824db90
3d38811
65cfd70
a5bfea9
a769bac
b787059
82f651b
d2b9691
8ad4a1b
4733b20
a369b5e
e3e0d5f
7a1e628
81954a6
c197ef8
f6ff87e
46ec701
0f9d1e1
ddb6153
bc0605f
74aedeb
27edb67
cf1ae8a
c0c0fb6
c3082d9
c639cad
9be2490
91bf088
bf5c438
3d01ba8
1dbf0f7
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,33 @@ | ||
| #![cfg(all( | ||
| target_arch = "x86_64", | ||
| feature = "cpu_profile_generation", | ||
| feature = "kvm" | ||
| ))] | ||
| use anyhow::Context; | ||
| use clap::{Arg, Command}; | ||
| use std::io::BufWriter; | ||
|
|
||
| fn main() -> anyhow::Result<()> { | ||
| let cmd_arg = Command::new("generate-cpu-profile") | ||
| .version(env!("CARGO_PKG_VERSION")) | ||
| .arg_required_else_help(true) | ||
| .arg( | ||
| Arg::new("name") | ||
| .help("The name to give the CPU profile") | ||
| .num_args(1) | ||
| .required(true), | ||
| ) | ||
| .get_matches(); | ||
|
|
||
| let profile_name = cmd_arg.get_one::<String>("name").unwrap(); | ||
|
|
||
| let hypervisor = hypervisor::new().context("Could not obtain hypervisor")?; | ||
| // TODO: Consider letting the user provide a file path as a target instead of writing to stdout. | ||
| // The way it is now should be sufficient for a PoC however. | ||
| let writer = BufWriter::new(std::io::stdout().lock()); | ||
| arch::x86_64::cpu_profile_generation::generate_profile_data( | ||
| writer, | ||
| hypervisor.as_ref(), | ||
| profile_name, | ||
| ) | ||
| } |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -12,12 +12,17 @@ | |
| extern crate log; | ||
|
|
||
| use std::collections::BTreeMap; | ||
| use std::str::FromStr; | ||
| use std::sync::Arc; | ||
| use std::{fmt, result}; | ||
|
|
||
| use serde::de::IntoDeserializer; | ||
| use serde::{Deserialize, Serialize}; | ||
| use thiserror::Error; | ||
|
|
||
| #[cfg(target_arch = "x86_64")] | ||
| pub use crate::x86_64::cpu_profile::CpuProfile; | ||
|
|
||
| type GuestMemoryMmap = vm_memory::GuestMemoryMmap<vm_memory::bitmap::AtomicBitmap>; | ||
| type GuestRegionMmap = vm_memory::GuestRegionMmap<vm_memory::bitmap::AtomicBitmap>; | ||
|
|
||
|
|
@@ -56,6 +61,31 @@ pub enum Error { | |
| /// Type for returning public functions outcome. | ||
| pub type Result<T> = result::Result<T, Error>; | ||
|
|
||
| // If the target_arch is x86_64 we import CpuProfile from the x86_64 module, otherwise we | ||
| // declare it here. | ||
| #[cfg(not(target_arch = "x86_64"))] | ||
| #[derive(Debug, Default, Clone, Copy, PartialEq, Eq, serde::Serialize, serde::Deserialize)] | ||
| #[serde(rename_all = "kebab-case")] | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. why kebab-case? it's fine, just asking
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. If I recall correctly that is what QEMU uses for its CPU models. I thought it would be a good idea to stay consistent. |
||
| /// A [`CpuProfile`] is a mechanism for ensuring live migration compatibility | ||
| /// between host's with potentially different CPU models. | ||
| pub enum CpuProfile { | ||
| #[default] | ||
| Host, | ||
| } | ||
|
|
||
| impl FromStr for CpuProfile { | ||
| type Err = serde::de::value::Error; | ||
| fn from_str(s: &str) -> result::Result<Self, Self::Err> { | ||
| // Should accept both plain strings, and strings surrounded by `"`. | ||
| let normalized = s | ||
| .strip_prefix('"') | ||
| .unwrap_or(s) | ||
| .strip_suffix('"') | ||
| .unwrap_or(s); | ||
| Self::deserialize(normalized.into_deserializer()) | ||
| } | ||
| } | ||
|
|
||
| /// Type for memory region types. | ||
| #[derive(Clone, Copy, PartialEq, Eq, Debug, Serialize, Deserialize)] | ||
| pub enum RegionType { | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,239 @@ | ||
| use hypervisor::arch::x86::CpuIdEntry; | ||
| use hypervisor::{CpuVendor, HypervisorType}; | ||
| use serde::{Deserialize, Serialize}; | ||
|
|
||
| use crate::x86_64::CpuidReg; | ||
| use crate::x86_64::cpuid_definitions::{Parameters, deserialize_from_hex, serialize_as_hex}; | ||
|
|
||
| #[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, Default)] | ||
| #[serde(rename_all = "kebab-case")] | ||
| #[allow(non_camel_case_types)] | ||
| /// A [`CpuProfile`] is a mechanism for ensuring live migration compatibility | ||
| /// between host's with potentially different CPU models. | ||
| pub enum CpuProfile { | ||
| #[default] | ||
| Host, | ||
| Skylake, | ||
| SapphireRapids, | ||
| } | ||
|
|
||
| impl CpuProfile { | ||
| // We can only generate CPU profiles for the KVM hypervisor for the time being. | ||
| #[cfg(feature = "kvm")] | ||
| pub(in crate::x86_64) fn data(&self, amx: bool) -> Option<CpuProfileData> { | ||
| let mut data: CpuProfileData = match self { | ||
| Self::Host => None, | ||
| Self::Skylake => Some( | ||
| serde_json::from_slice(include_bytes!("cpu_profiles/skylake.json")) | ||
| .inspect_err(|e| { | ||
| error!("BUG: could not deserialize CPU profile. Got error: {:?}", e) | ||
| }) | ||
| .expect("should be able to deserialize pre-generated data"), | ||
| ), | ||
| Self::SapphireRapids => Some( | ||
| serde_json::from_slice(include_bytes!("cpu_profiles/sapphire-rapids.json")) | ||
| .inspect_err(|e| { | ||
| error!("BUG: could not deserialize CPU profile. Got error: {:?}", e) | ||
| }) | ||
| .expect("should be able to deserialize pre-generated data"), | ||
| ), | ||
| }?; | ||
|
|
||
| if !amx { | ||
| // In this case we will need to wipe out the AMX tile state components (if they are included in the profile) | ||
| for adj in data.adjustments.iter_mut() { | ||
| if adj.0.sub_leaf.start() != adj.0.sub_leaf.end() { | ||
| continue; | ||
| } | ||
| let sub_leaf = *adj.0.sub_leaf.start(); | ||
| let leaf = adj.0.leaf; | ||
| if (leaf == 0xd) && (sub_leaf == 0) && (adj.0.register == CpuidReg::EAX) { | ||
| adj.1.replacements &= !((1 << 17) | (1 << 18)); | ||
| } | ||
|
|
||
| if (leaf == 0xd) && (sub_leaf == 1) && (adj.0.register == CpuidReg::ECX) { | ||
| adj.1.replacements &= !((1 << 17) | (1 << 18)); | ||
| } | ||
|
|
||
| if (leaf == 0xd) && ((sub_leaf == 17) | (sub_leaf == 18)) { | ||
| adj.1.replacements = 0; | ||
| } | ||
| } | ||
| } | ||
|
|
||
| Some(data) | ||
| } | ||
|
|
||
| #[cfg(not(feature = "kvm"))] | ||
| pub(in crate::x86_64) fn data(&self) -> Option<CpuProfileData> { | ||
| unimplemented!() | ||
| } | ||
| } | ||
|
|
||
| /// Every [`CpuProfile`] different from `Host` has associated [`CpuProfileData`]. | ||
| /// | ||
| /// New constructors of this struct may only be generated through the CHV CLI (when built from source with | ||
| /// the `cpu-profile-generation` feature) which other hosts may then attempt to load in order to | ||
| /// increase the likelyhood of successful live migrations among all hosts that opted in to the given | ||
| /// CPU profile. | ||
| #[derive(Debug, Clone, Serialize, Deserialize)] | ||
| #[allow(dead_code)] | ||
| pub struct CpuProfileData { | ||
| /// The hypervisor used when generating this CPU profile. | ||
| pub(in crate::x86_64) hypervisor: HypervisorType, | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. TIL this syntax |
||
| /// The vendor of the CPU belonging to the host that generated this CPU profile. | ||
| pub(in crate::x86_64) cpu_vendor: CpuVendor, | ||
| /// Adjustments necessary to become compatible with the desired target. | ||
| pub(in crate::x86_64) adjustments: Vec<(Parameters, CpuidOutputRegisterAdjustments)>, | ||
| } | ||
|
|
||
| /* TODO: The [`CpuProfile`] struct will likely need a few more iterations. The following | ||
| sections should explain why: | ||
|
|
||
| # MSR restrictions | ||
|
|
||
| CPU profiles also need to restrict which MSRs may be manipulated by the guest as various physical CPUs | ||
| can have differing supported MSRs. | ||
|
|
||
| The CPU profile will thus necessarily need to contain some data related to MSR restrictions. That will | ||
| be taken care of in a follow up MR. | ||
|
|
||
| # Raw hardware CPUID for advanced opt-in features | ||
|
|
||
| Some more advanced CPU Features may either not be present when prompting the hypervisor for supported CPUID | ||
| enries (especially if this is done with the hypervisor in its default configuration), or may otherwise be | ||
| declared to be overwritten by all CPU profiles (as a safest default). | ||
|
|
||
| We may still want to let users opt-in to using such features if permitted by the hardware and hypervisor | ||
| however. Hence we may also want the `CpuProfile` to contain all CPUID entries obtained directly from the | ||
| hardware of the host the profile was built from. | ||
|
|
||
| This hardware information can then later be used on other hosts running under this pre-generated CPU | ||
| profile whenever the user wants to opt-in to more advanced CPU futures. If we can determine that the | ||
| feature is satisfied by both the hypervisor, the hardware of the host generating the profile, and the | ||
| current host then this should preserve live migration compatibility (unless the feature in inherently | ||
| incompatible with live migration of course). | ||
| */ | ||
|
|
||
| /// Used for adjusting an entire cpuid output register (EAX, EBX, ECX or EDX) | ||
| #[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)] | ||
| pub(super) struct CpuidOutputRegisterAdjustments { | ||
| #[serde(serialize_with = "serialize_as_hex")] | ||
| #[serde(deserialize_with = "deserialize_from_hex")] | ||
| pub(in crate::x86_64) replacements: u32, | ||
| /// Used to zero out the area `replacements` occupy. This mask is not necessarily !replacements, as replacements may pack values of different types (i.e. it is wrong to think of it as a bitset conceptually speaking). | ||
| #[serde(serialize_with = "serialize_as_hex")] | ||
| #[serde(deserialize_with = "deserialize_from_hex")] | ||
| pub(in crate::x86_64) mask: u32, | ||
| } | ||
| impl CpuidOutputRegisterAdjustments { | ||
| pub(in crate::x86_64) fn adjust(self, cpuid_output_register: &mut u32) { | ||
| let temp_register_copy = *cpuid_output_register; | ||
| let replacements_area_masked_in_temp_copy = temp_register_copy & self.mask; | ||
| *cpuid_output_register = replacements_area_masked_in_temp_copy | self.replacements; | ||
| } | ||
|
|
||
| pub(in crate::x86_64) fn adjust_cpuid_entries( | ||
| mut cpuid: Vec<CpuIdEntry>, | ||
| adjustments: &[(Parameters, Self)], | ||
| ) -> Result<Vec<CpuIdEntry>, MissingCpuidEntriesError> { | ||
| for entry in &mut cpuid { | ||
| for (reg, reg_value) in [ | ||
| (CpuidReg::EAX, &mut entry.eax), | ||
| (CpuidReg::EBX, &mut entry.ebx), | ||
| (CpuidReg::ECX, &mut entry.ecx), | ||
| (CpuidReg::EDX, &mut entry.edx), | ||
| ] { | ||
| // Get the adjustment corresponding to the entry's function/leaf and index/sub-leaf for each of the register. If no such | ||
| // adjustment is found we use the trivial adjustment (leading to the register being zeroed out entirely). | ||
| let adjustment = adjustments | ||
| .iter() | ||
| .find_map(|(param, adjustment)| { | ||
| ((param.leaf == entry.function) | ||
| & param.sub_leaf.contains(&entry.index) | ||
| & (param.register == reg)) | ||
| .then_some(*adjustment) | ||
| }) | ||
| .unwrap_or(CpuidOutputRegisterAdjustments { | ||
| mask: 0, | ||
| replacements: 0, | ||
| }); | ||
| adjustment.adjust(reg_value); | ||
| } | ||
| } | ||
| // Check that we found every value that was supposed to be replaced with something else than 0 | ||
| let mut missing_entry = false; | ||
|
|
||
| let eax_0xd_0 = cpuid | ||
| .iter() | ||
| .find(|entry| (entry.function == 0xd) && (entry.index == 0)) | ||
| .map(|entry| entry.eax) | ||
| .unwrap_or(0); | ||
| let ecx_0xd_1 = cpuid | ||
| .iter() | ||
| .find(|entry| (entry.function == 0xd) && (entry.index == 1)) | ||
| .map(|entry| entry.ecx) | ||
| .unwrap_or(0); | ||
|
|
||
| let edx_0xd_0 = cpuid | ||
| .iter() | ||
| .find(|entry| (entry.function == 0xd) && (entry.index == 0)) | ||
| .map(|entry| entry.edx) | ||
| .unwrap_or(0); | ||
| let edx_0xd_1 = cpuid | ||
| .iter() | ||
| .find(|entry| (entry.function == 0xd) && (entry.index == 1)) | ||
| .map(|entry| entry.edx) | ||
| .unwrap_or(0); | ||
|
|
||
| for (param, adjustment) in adjustments { | ||
| if adjustment.replacements == 0 { | ||
| continue; | ||
| } | ||
| let sub_start = *param.sub_leaf.start(); | ||
| let sub_end = *param.sub_leaf.end(); | ||
| if (param.leaf == 0xd) && (sub_start >= 2) && (sub_start < 32) && (sub_start == sub_end) | ||
| { | ||
| if (((1 << sub_start) & eax_0xd_0) == 0) && (((1 << sub_start) & ecx_0xd_1) == 0) { | ||
| // This means that the sub-leaf is to be considered invalid anyway and it is OK if we don't find it | ||
| continue; | ||
| } | ||
| } | ||
|
|
||
| if (param.leaf == 0xd) && (sub_start >= 32) && (sub_start < 64) { | ||
| if (((1 << (sub_start - 32)) & edx_0xd_0) == 0) | ||
| && (((1 << (sub_start - 32)) & edx_0xd_1) == 0) | ||
| { | ||
| // This means that the sub-leaf is to be considered invalid anyway and it is OK if we don't find it | ||
| continue; | ||
| } | ||
| } | ||
|
|
||
| if !cpuid.iter().any(|entry| { | ||
| (entry.function == param.leaf) && (param.sub_leaf.contains(&entry.index)) | ||
| }) { | ||
| error!( | ||
| "cannot adjust CPU profile. No entry found matching the required parameters: {:?}", | ||
| param | ||
| ); | ||
| missing_entry = true; | ||
| } | ||
| } | ||
| if missing_entry { | ||
| Err(MissingCpuidEntriesError) | ||
| } else { | ||
| Ok(cpuid) | ||
| } | ||
| } | ||
| } | ||
|
|
||
| #[derive(Debug)] | ||
| pub(in crate::x86_64) struct MissingCpuidEntriesError; | ||
|
|
||
| impl core::fmt::Display for MissingCpuidEntriesError { | ||
| fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { | ||
| f.write_str("Required CPUID entries not found") | ||
| } | ||
| } | ||
|
|
||
| impl core::error::Error for MissingCpuidEntriesError {} | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
optional = trueThere was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It should always be required for
x86_64as it is also used when loading CPU profile data (not only generation). We can make it optional for other architectures where there is no concept of CPU profiles (yet) though.The alternative would be for the CLI to generate
.rsfiles instead which might be something to consider as a follow up task, but I thinkserde_jsonis good enough for the PoC.