Skip to content

Commit cb0d7ce

Browse files
authored
fast_memcpy: add memcpy implementation for openhcl_vmm (#2297)
`openhcl_vmm` has a lot of code that depends on memcpy being fast, but musl's memcpy on x86_64 is often slow. Write a generic memcpy in Rust and rely on LLVM to do a good job optimizing it.
1 parent df1d615 commit cb0d7ce

File tree

7 files changed

+443
-0
lines changed

7 files changed

+443
-0
lines changed

Cargo.lock

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1664,6 +1664,13 @@ version = "0.1.9"
16641664
source = "registry+https://github.com/rust-lang/crates.io-index"
16651665
checksum = "7360491ce676a36bf9bb3c56c1aa791658183a54d2744120f27285738d90465a"
16661666

1667+
[[package]]
1668+
name = "fast_memcpy"
1669+
version = "0.0.0"
1670+
dependencies = [
1671+
"criterion",
1672+
]
1673+
16671674
[[package]]
16681675
name = "fast_select"
16691676
version = "0.0.0"
@@ -8089,6 +8096,7 @@ name = "underhill_entry"
80898096
version = "0.0.0"
80908097
dependencies = [
80918098
"anyhow",
8099+
"fast_memcpy",
80928100
"mimalloc",
80938101
"openssl_crypto_only",
80948102
"underhill_core",

Cargo.toml

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -103,6 +103,7 @@ console_relay = { path = "support/console_relay" }
103103
safe_intrinsics = { path = "support/safe_intrinsics" }
104104
debug_ptr = { path = "support/debug_ptr" }
105105
user_driver_emulated_mock = { path = "vm/devices/user_driver_emulated_mock" }
106+
fast_memcpy = { path = "support/fast_memcpy" }
106107
fast_select = { path = "support/fast_select" }
107108
fdt = { path = "support/fdt" }
108109
guid = { path = "support/guid" }
@@ -616,6 +617,11 @@ opt-level = 3
616617
[profile.dev.package.generator]
617618
opt-level = 3
618619

620+
# memcpy needs to be optimized heavily regardless of the build profile
621+
[profile.dev.package.fast_memcpy]
622+
opt-level = 3
623+
overflow-checks = false
624+
619625
[patch.crates-io]
620626
# Pending <https://github.com/ferrilab/bitvec/pull/273>
621627
bitvec = { git = "https://github.com/smalis-msft/bitvec", branch = "set-aliased-previous-val" }

openhcl/underhill_entry/Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@ underhill_dump.workspace = true
2525
openssl_crypto_only.workspace = true
2626

2727
anyhow.workspace = true
28+
fast_memcpy = { workspace = true, features = ["replace_system_memcpy"] }
2829
mimalloc.workspace = true
2930

3031
[lints]

openhcl/underhill_entry/src/lib.rs

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,13 @@
1010
#[global_allocator]
1111
static GLOBAL: mimalloc::MiMalloc = mimalloc::MiMalloc;
1212

13+
// musl's memcpy implementation is slow on x86_64, so we use memcpy crate to
14+
// provide an optimized implementation.
15+
//
16+
// xtask-fmt allow-target-arch sys-crate
17+
#[cfg(target_arch = "x86_64")]
18+
use fast_memcpy as _;
19+
1320
// OpenVMM-HCL only needs libcrypto from openssl, not libssl.
1421
#[cfg(target_os = "linux")]
1522
openssl_crypto_only::openssl_crypto_only!();

support/fast_memcpy/Cargo.toml

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
# Copyright (c) Microsoft Corporation.
2+
# Licensed under the MIT License.
3+
4+
[package]
5+
name = "fast_memcpy"
6+
rust-version.workspace = true
7+
edition.workspace = true
8+
9+
[features]
10+
# Export `memcpy` and `memmove`, replacing the system-provided implementation.
11+
replace_system_memcpy = []
12+
13+
[dependencies]
14+
15+
[dev-dependencies]
16+
criterion.workspace = true
17+
18+
[[bench]]
19+
name = "perf"
20+
harness = false
21+
22+
[lints]
23+
workspace = true
Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,44 @@
1+
// Copyright (c) Microsoft Corporation.
2+
// Licensed under the MIT License.
3+
4+
//! Performance tests.
5+
6+
// UNSAFETY: testing unsafe code.
7+
#![expect(unsafe_code)]
8+
#![expect(missing_docs)]
9+
10+
use criterion::BenchmarkId;
11+
12+
criterion::criterion_main!(benches);
13+
14+
criterion::criterion_group!(benches, bench_memcpy);
15+
16+
fn bench_memcpy(c: &mut criterion::Criterion) {
17+
unsafe extern "C" {
18+
fn memcpy(dest: *mut u8, src: *const u8, len: usize) -> *mut u8;
19+
}
20+
do_bench_memcpy(c.benchmark_group("fast_memcpy"), fast_memcpy::memcpy);
21+
do_bench_memcpy(c.benchmark_group("system_memcpy"), memcpy);
22+
}
23+
24+
fn do_bench_memcpy(
25+
mut group: criterion::BenchmarkGroup<'_, criterion::measurement::WallTime>,
26+
memcpy_fn: unsafe extern "C" fn(*mut u8, *const u8, usize) -> *mut u8,
27+
) {
28+
for &len in &[
29+
1usize, 2, 3, 4, 7, 8, 12, 24, 32, 48, 64, 256, 1024, 4096, 8000,
30+
] {
31+
group.bench_function(BenchmarkId::new("len", len), |b| {
32+
let src = vec![0u8; len];
33+
let mut dest = vec![0u8; len];
34+
// SAFETY: operating correctly on src/dest.
35+
b.iter(|| unsafe {
36+
memcpy_fn(
37+
core::hint::black_box(dest.as_mut_ptr()),
38+
core::hint::black_box(src.as_ptr()),
39+
core::hint::black_box(len),
40+
)
41+
});
42+
});
43+
}
44+
}

0 commit comments

Comments
 (0)