diff --git a/src/bin/ch-remote.rs b/src/bin/ch-remote.rs index 9e042ebe5b..c5c09f31a8 100644 --- a/src/bin/ch-remote.rs +++ b/src/bin/ch-remote.rs @@ -22,6 +22,7 @@ use clap::{Arg, ArgAction, ArgMatches, Command}; use log::error; use option_parser::{ByteSized, ByteSizedParseError}; use thiserror::Error; +use vmm::api::VmMigrationProgressData; use vmm::config::RestoreConfig; use vmm::vm_config::{ DeviceConfig, DiskConfig, FsConfig, NetConfig, PmemConfig, UserDeviceConfig, VdpaConfig, @@ -301,6 +302,21 @@ fn rest_api_do_command(matches: &ArgMatches, socket: &mut UnixStream) -> ApiResu Some("shutdown") => { simple_api_command(socket, "PUT", "shutdown", None).map_err(Error::HttpApiClient) } + Some("migration-progress") => { + let clear = matches + .subcommand_matches("migration-progress") + .unwrap() + .get_one::("clear") + .copied(); + let data = migration_progress(clear)?; + if clear.unwrap_or(false) { + simple_api_command(socket, "GET", "migration-progress", Some(&data)) + .map_err(Error::HttpApiClient) + } else { + simple_api_command(socket, "PUT", "migration-progress", Some(&data)) + .map_err(Error::HttpApiClient) + } + } Some("nmi") => simple_api_command(socket, "PUT", "nmi", None).map_err(Error::HttpApiClient), Some("resize") => { let resize = resize_config( @@ -777,6 +793,13 @@ fn dbus_api_do_command(matches: &ArgMatches, proxy: &DBusApi1ProxyBlocking<'_>) } } +fn migration_progress(clear: Option) -> Result { + let data = VmMigrationProgressData { + clear: clear.unwrap_or(false), + }; + Ok(serde_json::to_string(&data).unwrap()) +} + fn resize_config( cpus: Option<&str>, memory: Option<&str>, @@ -1078,6 +1101,13 @@ fn get_cli_commands_sorted() -> Box<[Command]> { Command::new("ping").about("Ping the VMM to check for API server availability"), Command::new("power-button").about("Trigger a power button in the VM"), Command::new("reboot").about("Reboot the VM"), + Command::new("migration-progress") + .about("Fetch state about the ongoing migration") + .arg( + Arg::new("clear") + .long("clear") + .help("Whether the latest snapshot should be cleared."), + ), Command::new("receive-migration") .about("Receive a VM migration") .arg( diff --git a/vm-migration/src/lib.rs b/vm-migration/src/lib.rs index daaa5d0d53..8519e60877 100644 --- a/vm-migration/src/lib.rs +++ b/vm-migration/src/lib.rs @@ -4,12 +4,14 @@ // use anyhow::anyhow; +pub use progress::MigrationPhase; use serde::{Deserialize, Serialize}; use thiserror::Error; use crate::protocol::MemoryRangeTable; mod bitpos_iterator; +pub mod progress; pub mod protocol; pub mod tls; diff --git a/vm-migration/src/progress.rs b/vm-migration/src/progress.rs new file mode 100644 index 0000000000..1b40d22f04 --- /dev/null +++ b/vm-migration/src/progress.rs @@ -0,0 +1,346 @@ +// Copyright © 2025 Cyberus Technology GmbH +// +// SPDX-License-Identifier: Apache-2.0 + +//! Module for reporting of the live-migration progress. +//! +//! The main export is [`MigrationProgressAndStatus`]. + +use std::error::Error; +use std::num::NonZeroU32; +use std::time::{Duration, SystemTime, UNIX_EPOCH}; + +#[derive(Clone, Debug, serde::Serialize, serde::Deserialize)] +pub enum TransportationMode { + Local, + Tcp { connections: NonZeroU32, tls: bool }, +} + +/// Carries information about the transmission of the VM's memory. +#[derive(Clone, Copy, Debug, Default, serde::Serialize, serde::Deserialize)] +pub struct MemoryTransmissionInfo { + /// The memory iteration (only in precopy mode). + pub memory_iteration: u64, + /// Memory bytes per second. + pub memory_transmission_bps: u64, + /// The total size of the VMs memory in bytes. + pub memory_bytes_total: u64, + /// The total size of transmitted bytes. + pub memory_bytes_transmitted: u64, + /// The amount of remaining bytes for this iteration. + pub memory_bytes_remaining_iteration: u64, + /// The amount of transmitted 4k pages. + pub memory_pages_4k_transmitted: u64, + /// The amount of remaining 4k pages for this iteration. + pub memory_pages_4k_remaining_iteration: u64, + /// The amount of zero pages for that we could take a shortcut + /// as all bytes have on fixed value (e.g., a zero page). + pub memory_pages_constant_count: u64, + /// Current memory dirty rate in pages per seconds. + pub memory_dirty_rate_pps: u64, +} + +/// The different phases of an ongoing migration (good case). +#[derive(Clone, Debug, serde::Serialize, serde::Deserialize)] +pub enum MigrationPhase { + /// The migration starts. Handshake and transfer of VM config. + Starting, + /// Transfer of memory FDs. + /// + /// Only used for local migrations. + MemoryFds, + /// Transfer of VM memory in precopy mode. + /// + /// Not used for local migrations. + MemoryPrecopy, + /*/// Transfer of VM memory in postcopy mode. + /// + /// This follows after a precopy phase. + /// + /// Not used for local migrations. + MemoryPostcopy,*/ + /// The VM migration is completing. This means the last chunks of memory + /// are transmitted as well as the final VM state (vCPUs, devices). + Completing, +} + +#[derive(Clone, Debug, serde::Serialize, serde::Deserialize)] +pub enum MigrationProgressState { + /// The migration has been cancelled. + Cancelled { + /// The latest memory transmission info, if any. + memory_transmission_info: MemoryTransmissionInfo, + }, + /// The migration has failed. + Failed { + /// The last memory transmission info, if any. + memory_transmission_info: MemoryTransmissionInfo, + /// Stringified error. + error_msg: String, + /// Debug-stringified error. + error_msg_debug: String, + // TODO this is very tricky because I need clone() + // error: Box, + }, + /// The migration has finished successfully. + Finished { + /// The last memory transmission info, if any. + memory_transmission_info: MemoryTransmissionInfo, + }, + /// The migration is ongoing. + Ongoing { + phase: MigrationPhase, + memory_transmission_info: MemoryTransmissionInfo, + /// Percent in range `0..=100`. + vcpu_throttle_percent: u8, + }, +} + +impl MigrationProgressState { + fn memory_transmission_info(&self) -> MemoryTransmissionInfo { + match self { + MigrationProgressState::Cancelled { + memory_transmission_info, + .. + } => *memory_transmission_info, + MigrationProgressState::Failed { + memory_transmission_info, + .. + } => *memory_transmission_info, + MigrationProgressState::Finished { + memory_transmission_info, + .. + } => *memory_transmission_info, + MigrationProgressState::Ongoing { + memory_transmission_info, + .. + } => *memory_transmission_info, + } + } + + fn state_name(&self) -> &'static str { + match self { + MigrationProgressState::Cancelled { .. } => "cancelled", + MigrationProgressState::Failed { .. } => "failed", + MigrationProgressState::Finished { .. } => "finished", + MigrationProgressState::Ongoing { .. } => "ongoing", + } + } + + fn cpu_throttle_percent(&self) -> Option { + match self { + MigrationProgressState::Ongoing { + vcpu_throttle_percent, + .. + } => Some(*vcpu_throttle_percent), + _ => None, + } + } +} + +/// Returns the current UNIX timestamp in ms. +fn current_unix_timestamp_ms() -> u64 { + SystemTime::now() + .duration_since(UNIX_EPOCH) + .unwrap() + .as_millis() as u64 +} + +/// Type holding a current snapshot about the progress and status information +/// of an ongoing live migration. +/// +/// The states correspond to the [live-migration protocol]. This type was +/// specifically crafted with easy yet clear semantics for API users in mind. +/// +/// [live-migration protocol]: super::protocol +#[derive(Clone, Debug, serde::Serialize, serde::Deserialize)] +pub struct MigrationProgressAndStatus { + /// UNIX timestamp of the start of the live-migration process. + pub timestamp_begin: u64, + /// UNIX timestamp of the current snapshot. + pub timestamp_snapshot: u64, + /// Configured target downtime. + pub downtime_ms_target: u64, + /// The currently expected downtime. + pub downtime_ms_expected: Option, + /// The requested transportation mode. + pub transportation_mode: TransportationMode, + /// Snapshot of the current phase. + pub state: MigrationProgressState, +} + +impl MigrationProgressAndStatus { + pub fn new(transportation_mode: TransportationMode, target_downtime: Duration) -> Self { + let timestamp = current_unix_timestamp_ms(); + Self { + timestamp_begin: timestamp, + timestamp_snapshot: timestamp, + downtime_ms_target: target_downtime.as_millis() as u64, + downtime_ms_expected: None, + transportation_mode, + state: MigrationProgressState::Ongoing { + phase: MigrationPhase::Starting, + memory_transmission_info: MemoryTransmissionInfo::default(), + vcpu_throttle_percent: 42, + }, + } + } + + /// Updates the state of an ongoing migration. + pub fn update_ongoing_migration_state( + &mut self, + phase: MigrationPhase, + latest_memory_transmission_info: Option, + latest_cpu_throttle_percent: Option, + ) { + if !matches!(self.state, MigrationProgressState::Ongoing { .. }) { + panic!( + "illegal state transition: {} -> ongoing", + self.state.state_name() + ); + } + + if let Some(cpu_throttle_percent) = latest_cpu_throttle_percent { + assert!(cpu_throttle_percent <= 100); + } + + self.state = MigrationProgressState::Ongoing { + phase, + memory_transmission_info: latest_memory_transmission_info + .unwrap_or_else(|| self.state.memory_transmission_info()), + vcpu_throttle_percent: latest_cpu_throttle_percent + .or_else(|| self.state.cpu_throttle_percent()) + .unwrap_or(0), + }; + } + + /// Sets the underlying state to [`MigrationProgressState::Cancelled`] and + /// updates all corresponding metadata. + /// + /// After this state change, the object is supposed to be handled as immutable. + pub fn mark_as_cancelled(&mut self) { + if !matches!(self.state, MigrationProgressState::Ongoing { .. }) { + panic!( + "illegal state transition: {} -> cancelled", + self.state.state_name() + ); + } + self.timestamp_snapshot = current_unix_timestamp_ms(); + self.timestamp_snapshot = current_unix_timestamp_ms(); + self.state = MigrationProgressState::Cancelled { + memory_transmission_info: self.state.memory_transmission_info(), + }; + } + + /// Sets the underlying state to [`MigrationProgressState::Failed`] and + /// updates all corresponding metadata. + /// + /// After this state change, the object is supposed to be handled as immutable. + pub fn mark_as_failed(&mut self, error: &dyn Error) { + if !matches!(self.state, MigrationProgressState::Ongoing { .. }) { + panic!( + "illegal state transition: {} -> failed", + self.state.state_name() + ); + } + self.timestamp_snapshot = current_unix_timestamp_ms(); + self.state = MigrationProgressState::Failed { + memory_transmission_info: self.state.memory_transmission_info(), + error_msg: format!("{}", error), + error_msg_debug: format!("{:?}", error), + }; + } + + /// Sets the underlying state to [`MigrationProgressState::Finished`] and + /// updates all corresponding metadata. + /// + /// After this state change, the object is supposed to be handled as immutable. + pub fn mark_as_finished(&mut self) { + if !matches!(self.state, MigrationProgressState::Ongoing { .. }) { + panic!( + "illegal state transition: {} -> finished", + self.state.state_name() + ); + } + self.timestamp_snapshot = current_unix_timestamp_ms(); + self.state = MigrationProgressState::Finished { + memory_transmission_info: self.state.memory_transmission_info(), + }; + } +} + +#[cfg(test)] +mod tests { + use anyhow::anyhow; + + use super::*; + + // Helpful to see what the API will look like. + #[test] + fn print_json() { + let starting = MigrationProgressAndStatus::new( + TransportationMode::Tcp { + connections: NonZeroU32::new(1).unwrap(), + tls: false, + }, + Duration::from_millis(100), + ); + let memory_precopy = { + let mut state = starting.clone(); + state.update_ongoing_migration_state( + MigrationPhase::MemoryPrecopy, + Some(MemoryTransmissionInfo { + memory_iteration: 7, + memory_transmission_bps: 0, + memory_bytes_total: 0x1337, + memory_bytes_transmitted: 0x1337, + memory_pages_4k_transmitted: 42, + memory_pages_4k_remaining_iteration: 42, + memory_bytes_remaining_iteration: 124, + memory_dirty_rate_pps: 42, + memory_pages_constant_count: 0, + }), + Some(42), + ); + state + }; + let completing = { + let mut state = memory_precopy.clone(); + state.update_ongoing_migration_state(MigrationPhase::Completing, None, Some(99)); + state + }; + let completed = { + let mut state = completing.clone(); + state.mark_as_finished(); + state + }; + let failed = { + let mut state = completing.clone(); + let error = anyhow!("Some very bad error".to_string()); + let error: &dyn Error = error.as_ref(); + state.mark_as_failed(error); + state + }; + let cancelled = { + let mut state = completing.clone(); + state.mark_as_cancelled(); + state + }; + + let vals = [ + starting, + memory_precopy, + completing, + completed, + failed, + cancelled, + ]; + for val in vals { + println!( + "{:?}:\n{}\n\n", + val, + serde_json::to_string_pretty(&val).unwrap() + ); + } + } +} diff --git a/vmm/src/api/http/http_endpoint.rs b/vmm/src/api/http/http_endpoint.rs index 35ef0ba946..9f1f348ee0 100644 --- a/vmm/src/api/http/http_endpoint.rs +++ b/vmm/src/api/http/http_endpoint.rs @@ -53,9 +53,10 @@ use crate::api::VmCoredump; use crate::api::http::{EndpointHandler, HttpError, error_response}; use crate::api::{ AddDisk, ApiAction, ApiError, ApiRequest, NetConfig, VmAddDevice, VmAddFs, VmAddNet, VmAddPmem, - VmAddUserDevice, VmAddVdpa, VmAddVsock, VmBoot, VmConfig, VmCounters, VmDelete, VmNmi, VmPause, - VmPowerButton, VmReboot, VmReceiveMigration, VmReceiveMigrationData, VmRemoveDevice, VmResize, - VmResizeDisk, VmResizeZone, VmRestore, VmResume, VmSendMigration, VmShutdown, VmSnapshot, + VmAddUserDevice, VmAddVdpa, VmAddVsock, VmBoot, VmConfig, VmCounters, VmDelete, + VmMigrationProgress, VmMigrationProgressData, VmNmi, VmPause, VmPowerButton, VmReboot, + VmReceiveMigration, VmReceiveMigrationData, VmRemoveDevice, VmResize, VmResizeDisk, + VmResizeZone, VmRestore, VmResume, VmSendMigration, VmShutdown, VmSnapshot, }; use crate::config::{RestoreConfig, RestoredNetConfig}; use crate::cpu::Error as CpuError; @@ -412,13 +413,13 @@ impl PutHandler for VmSendMigration { ) .map_err(HttpError::ApiError)?; - info!("live migration started"); + debug!("live migration started"); let (_, receiver) = &*ONGOING_LIVEMIGRATION; - info!("waiting for live migration result"); + debug!("waiting for live migration result"); let mig_res = receiver.lock().unwrap().recv().unwrap(); - info!("received live migration result"); + debug!("received live migration result"); // We forward the migration error here to the guest mig_res @@ -613,3 +614,60 @@ impl EndpointHandler for VmmShutdown { } } } + +impl EndpointHandler for VmMigrationProgress { + fn handle_request( + &self, + req: &Request, + api_notifier: EventFd, + api_sender: Sender, + ) -> Response { + match req.method() { + Method::Get => match crate::api::VmMigrationProgress + .send( + api_notifier, + api_sender, + VmMigrationProgressData { clear: false }, + ) + .map_err(HttpError::ApiError) + { + Ok(info) => { + let mut response = Response::new(Version::Http11, StatusCode::OK); + let info_serialized = serde_json::to_string(&info).unwrap(); + + response.set_body(Body::new(info_serialized)); + response + } + Err(e) => error_response(e, StatusCode::InternalServerError), + }, + Method::Put => { + if req.body.is_none() { + // no body when a body was expected + return error_response(HttpError::BadRequest, StatusCode::BadRequest); + } + let body = req.body.as_ref().unwrap(); + let data: VmMigrationProgressData = match serde_json::from_slice(body.raw()) + .map_err(HttpError::SerdeJsonDeserialize) + { + Ok(config) => config, + Err(e) => return error_response(e, StatusCode::BadRequest), + }; + + match crate::api::VmMigrationProgress + .send(api_notifier, api_sender, data) + .map_err(HttpError::ApiError) + { + Ok(info) => { + let mut response = Response::new(Version::Http11, StatusCode::OK); + let info_serialized = serde_json::to_string(&info).unwrap(); + + response.set_body(Body::new(info_serialized)); + response + } + Err(e) => error_response(e, StatusCode::InternalServerError), + } + } + _ => error_response(HttpError::BadRequest, StatusCode::BadRequest), + } + } +} diff --git a/vmm/src/api/http/mod.rs b/vmm/src/api/http/mod.rs index eaa18d311b..088047fc45 100644 --- a/vmm/src/api/http/mod.rs +++ b/vmm/src/api/http/mod.rs @@ -31,9 +31,9 @@ use self::http_endpoint::{VmActionHandler, VmCreate, VmInfo, VmmPing, VmmShutdow use crate::api::VmCoredump; use crate::api::{ AddDisk, ApiError, ApiRequest, VmAddDevice, VmAddFs, VmAddNet, VmAddPmem, VmAddUserDevice, - VmAddVdpa, VmAddVsock, VmBoot, VmCounters, VmDelete, VmNmi, VmPause, VmPowerButton, VmReboot, - VmReceiveMigration, VmRemoveDevice, VmResize, VmResizeDisk, VmResizeZone, VmRestore, VmResume, - VmSendMigration, VmShutdown, VmSnapshot, + VmAddVdpa, VmAddVsock, VmBoot, VmCounters, VmDelete, VmMigrationProgress, VmNmi, VmPause, + VmPowerButton, VmReboot, VmReceiveMigration, VmRemoveDevice, VmResize, VmResizeDisk, + VmResizeZone, VmRestore, VmResume, VmSendMigration, VmShutdown, VmSnapshot, }; use crate::landlock::Landlock; use crate::seccomp_filters::{Thread, get_seccomp_filter}; @@ -276,6 +276,10 @@ pub static HTTP_ROUTES: LazyLock = LazyLock::new(|| { endpoint!("/vm.shutdown"), Box::new(VmActionHandler::new(&VmShutdown)), ); + r.routes.insert( + endpoint!("/vm.migration-progress"), + Box::new(VmMigrationProgress {}), + ); r.routes.insert( endpoint!("/vm.snapshot"), Box::new(VmActionHandler::new(&VmSnapshot)), diff --git a/vmm/src/api/mod.rs b/vmm/src/api/mod.rs index 73e352a11d..483052f89e 100644 --- a/vmm/src/api/mod.rs +++ b/vmm/src/api/mod.rs @@ -42,6 +42,7 @@ use micro_http::Body; use serde::{Deserialize, Serialize}; use thiserror::Error; use vm_migration::MigratableError; +use vm_migration::progress::MigrationProgressAndStatus; use vmm_sys_util::eventfd::EventFd; #[cfg(feature = "dbus_api")] @@ -202,6 +203,10 @@ pub enum ApiError { /// Error triggering NMI #[error("Error triggering NMI")] VmNmi(#[source] VmError), + + /// Error fetching the migration progress + #[error("Error fetching the migration progress")] + VmMigrationProgress(#[source] VmError), } pub type ApiResult = Result; @@ -296,6 +301,13 @@ pub struct VmSendMigrationData { pub tls_dir: Option, } +#[derive(Clone, Deserialize, Serialize, Debug)] +pub struct VmMigrationProgressData { + /// clear the latest snapshot + #[serde(default)] + pub clear: bool, +} + // Default value for downtime the same as qemu. fn default_downtime() -> u64 { 300 @@ -313,6 +325,9 @@ pub enum ApiResponsePayload { /// Virtual machine information VmInfo(VmInfoResponse), + /// The progress of a possibly ongoing live migration. + VmMigrationProgress(Option), + /// Vmm ping response VmmPing(VmmPingResponse), @@ -398,6 +413,11 @@ pub trait RequestHandler { ) -> Result<(), MigratableError>; fn vm_nmi(&mut self) -> Result<(), VmError>; + + fn vm_migration_progress( + &mut self, + options: VmMigrationProgressData, + ) -> Result, VmError>; } /// It would be nice if we could pass around an object like this: @@ -1532,3 +1552,45 @@ impl ApiAction for VmNmi { get_response_body(self, api_evt, api_sender, data) } } + +pub struct VmMigrationProgress; + +impl ApiAction for VmMigrationProgress { + type RequestBody = VmMigrationProgressData; + type ResponseBody = Option; + + fn request( + &self, + config: Self::RequestBody, + response_sender: Sender, + ) -> ApiRequest { + Box::new(move |vmm| { + info!("API request event: VmMigrationProgress"); + + let response = vmm + .vm_migration_progress(config) + .map(ApiResponsePayload::VmMigrationProgress) + .map_err(ApiError::VmMigrationProgress); + + response_sender + .send(response) + .map_err(VmmError::ApiResponseSend)?; + + Ok(false) + }) + } + + fn send( + &self, + api_evt: EventFd, + api_sender: Sender, + data: Self::RequestBody, + ) -> ApiResult { + let info = get_response(self, api_evt, api_sender, data)?; + + match info { + ApiResponsePayload::VmMigrationProgress(info) => Ok(info), + _ => Err(ApiError::ResponsePayloadType), + } + } +} diff --git a/vmm/src/lib.rs b/vmm/src/lib.rs index e2fc557905..6a69321023 100644 --- a/vmm/src/lib.rs +++ b/vmm/src/lib.rs @@ -58,10 +58,14 @@ use vm_memory::{ GuestAddress, GuestAddressSpace, GuestMemory, GuestMemoryAtomic, ReadVolatile, VolatileMemoryError, VolatileSlice, WriteVolatile, }; +use vm_migration::progress::{ + MemoryTransmissionInfo, MigrationProgressAndStatus, TransportationMode, +}; use vm_migration::protocol::*; use vm_migration::tls::{TlsConnectionWrapper, TlsStream, TlsStreamWrapper}; use vm_migration::{ - Migratable, MigratableError, Pausable, Snapshot, Snapshottable, Transportable, tls, + Migratable, MigratableError, MigrationPhase, Pausable, Snapshot, Snapshottable, Transportable, + tls, }; use vmm_sys_util::eventfd::EventFd; use vmm_sys_util::signal::unblock_signal; @@ -69,8 +73,8 @@ use vmm_sys_util::sock_ctrl_msg::ScmSocket; use crate::api::http::http_endpoint::ONGOING_LIVEMIGRATION; use crate::api::{ - ApiRequest, ApiResponse, RequestHandler, VmInfoResponse, VmReceiveMigrationData, - VmSendMigrationData, VmmPingResponse, + ApiRequest, ApiResponse, RequestHandler, VmInfoResponse, VmMigrationProgressData, + VmReceiveMigrationData, VmSendMigrationData, VmmPingResponse, }; use crate::config::{RestoreConfig, add_to_config}; #[cfg(all(target_arch = "x86_64", feature = "guest_debug"))] @@ -267,6 +271,9 @@ impl From for EpollDispatch { } } +// TODO make this a member of Vmm +static MIGRATION_PROGRESS_SNAPSHOT: Mutex> = Mutex::new(None); + enum SocketStream { Unix(UnixStream), Tcp(TcpStream), @@ -680,6 +687,7 @@ impl VmmVersionInfo { } } +/// Internal metrics for the transport of the memory transfer of the ongoing live-migration. #[derive(Debug, Clone)] struct MigrationState { current_dirty_pages: u64, @@ -2032,6 +2040,12 @@ impl Vmm { ) -> result::Result { let mut bandwidth = 0.0; let mut iteration_table; + let total_memory_size_bytes = vm + .memory_range_table()? + .regions() + .iter() + .map(|range| range.length) + .sum::(); loop { // todo: check if auto-converge is enabled at all? @@ -2088,6 +2102,30 @@ impl Vmm { s.current_dirty_pages = s.pending_size.div_ceil(PAGE_SIZE as u64); s.total_transferred_dirty_pages += s.current_dirty_pages; + // Update migration progress snapshot + // TODO the first version alters this at the beginning of each iteration. + // We should do this continuously instead. + { + let mut lock = MIGRATION_PROGRESS_SNAPSHOT.lock().unwrap(); + lock.as_mut() + .expect("live migration should be ongoing") + .update_ongoing_migration_state( + MigrationPhase::MemoryPrecopy, + Some(MemoryTransmissionInfo { + memory_iteration: s.iteration, + memory_transmission_bps: s.mb_per_sec as u64 * 1024 * 1024, + memory_bytes_total: total_memory_size_bytes, + memory_bytes_transmitted: s.total_transferred_bytes, + memory_pages_4k_transmitted: s.total_transferred_dirty_pages, + memory_pages_4k_remaining_iteration: s.current_dirty_pages, + memory_bytes_remaining_iteration: s.pending_size, + memory_dirty_rate_pps: 0, /* TODO */ + memory_pages_constant_count: 0, + }), + Some(vm.throttle_percent()), + ); + } + // Send the current dirty pages let transfer_start = Instant::now(); mem_send.send_memory(&iteration_table, socket)?; @@ -2202,6 +2240,25 @@ impl Vmm { >, send_data_migration: VmSendMigrationData, ) -> result::Result<(), MigratableError> { + // Update migration progress snapshot + { + let mut lock = MIGRATION_PROGRESS_SNAPSHOT.lock().unwrap(); + // if this fails, we made a programming error in our state handling + assert!(lock.is_none()); + let transportation_mode = if send_data_migration.local { + TransportationMode::Local + } else { + TransportationMode::Tcp { + connections: send_data_migration.connections, + tls: send_data_migration.tls_dir.is_some(), + } + }; + lock.replace(MigrationProgressAndStatus::new( + transportation_mode, + Duration::from_millis(send_data_migration.downtime), + )); + } + let mut s = MigrationState::new(); // Set up the socket connection @@ -2246,6 +2303,11 @@ impl Vmm { if send_data_migration.local { match &mut socket { SocketStream::Unix(unix_socket) => { + let mut lock = MIGRATION_PROGRESS_SNAPSHOT.lock().unwrap(); + lock.as_mut() + .expect("live migration should be ongoing") + .update_ongoing_migration_state(MigrationPhase::MemoryFds, None, None); + // Proceed with sending memory file descriptors over UNIX socket vm.send_memory_fds(unix_socket)?; } @@ -2286,6 +2348,14 @@ impl Vmm { vm.pause()?; } else { Self::do_memory_migration(vm, &mut socket, &mut s, &send_data_migration)?; + + // Update migration progress snapshot + { + let mut lock = MIGRATION_PROGRESS_SNAPSHOT.lock().unwrap(); + lock.as_mut() + .expect("live migration should be ongoing") + .update_ongoing_migration_state(MigrationPhase::Completing, None, None); + } } // We release the locks early to enable locking them on the destination host. @@ -2331,10 +2401,26 @@ impl Vmm { // Record total migration time s.total_time = s.start_time.elapsed(); - info!("Migration complete"); + info!("Migration complete: {}s", s.total_time.as_secs_f32()); + + // Update migration progress snapshot + { + let mut lock = MIGRATION_PROGRESS_SNAPSHOT.lock().unwrap(); + lock.as_mut() + .expect("live migration should be ongoing") + .mark_as_finished(); + } // Let every Migratable object know about the migration being complete - vm.complete_migration() + vm.complete_migration()?; + + // Give management software a chance to fetch the migration state. + info!("Sleeping five seconds before shutting off."); + // TODO right now, the http-server is single-threaded and the blocking + // start-migration API call will block other requests here. + thread::sleep(Duration::from_secs(5)); + + Ok(()) } #[cfg(all(feature = "kvm", target_arch = "x86_64"))] @@ -2483,6 +2569,14 @@ impl Vmm { } } Err(e) => { + // Update migration progress snapshot + { + let mut lock = MIGRATION_PROGRESS_SNAPSHOT.lock().unwrap(); + lock.as_mut() + .expect("live migration should be ongoing") + .mark_as_failed(&e); + } + error!("Migration failed: {}", e); { info!("Sending Receiver in HTTP thread that migration failed"); @@ -3483,6 +3577,19 @@ impl RequestHandler for Vmm { Ok(()) } + + fn vm_migration_progress( + &mut self, + options: VmMigrationProgressData, + ) -> result::Result, VmError> { + let mut lock = MIGRATION_PROGRESS_SNAPSHOT.lock().unwrap(); + let snapshot = if options.clear { + lock.take() + } else { + lock.as_ref().cloned() + }; + Ok(snapshot) + } } const CPU_MANAGER_SNAPSHOT_ID: &str = "cpu-manager";