Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

27 changes: 22 additions & 5 deletions crates/common/src/chain_config.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,15 +2,31 @@ use crate::helpers::system_start_to_epoch_millis;
use pallas_network::miniprotocols::localstate::queries_v16::{CurrentProtocolParam, GenesisConfig};
use serde::Serialize;

/// This structure is used to share server-wide cachables
/// Cached chain configuration queried from the Cardano node at startup and
/// refreshed at epoch boundaries by [`ChainConfigWatch`].
Copy link

Copilot AI Mar 24, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This doc comment uses an intra-doc link to ChainConfigWatch, but that type lives in bf_node and isn’t in scope in the bf_common crate. This can produce broken intra-doc-link warnings during cargo doc/docs.rs. Consider removing the link formatting or referencing it as plain text (or using an external URL) to avoid unresolved links.

Suggested change
/// refreshed at epoch boundaries by [`ChainConfigWatch`].
/// refreshed at epoch boundaries by `ChainConfigWatch`.

Copilot uses AI. Check for mistakes.
pub struct ChainConfigCache {
/// Shelley genesis configuration (network magic, system start, epoch length, etc.).
pub genesis_config: GenesisConfig,
/// Current protocol parameters (fees, execution unit prices, cost models, etc.).
/// May change at epoch boundaries via governance actions.
pub protocol_params: CurrentProtocolParam,
/// Slot timing derived from genesis.
pub slot_config: SlotConfig,
/// Current Cardano era index (see [`Self::CONWAY_ERA`]).
pub era: u16,
}

impl ChainConfigCache {
/// Conway era index in the Cardano era sequence used by Ouroboros:
/// Byron=0, Shelley=1, Allegra=2, Mary=3, Alonzo=4, Babbage=5, Conway=6.
///
/// This is an application policy constant, not a runtime-discoverable value.
/// The actual era is queried from the node via `get_current_era()` — this
/// constant defines the minimum era we require. A future era bump will
/// require code changes well beyond this constant (CBOR codecs, protocol
/// params, pallas, testgen-hs), so hardcoding is intentional.
pub const CONWAY_ERA: u16 = 6;

pub fn new(
genesis_config: GenesisConfig,
protocol_params: CurrentProtocolParam,
Expand All @@ -21,20 +37,21 @@ impl ChainConfigCache {
genesis_config,
protocol_params,
slot_config,
// The era number in the Cardano era sequence used by Ogmios/testgen-hs:
// Byron=0, Shelley=1, Allegra=2, Mary=3, Alonzo=4, Babbage=5, Conway=6.
// Hardcoded to Conway (6) since that is the only era we currently support.
era: 6,
era: Self::CONWAY_ERA,
})
}
}

#[derive(Debug, PartialEq, Eq, Clone, Serialize)]
#[serde(rename_all = "camelCase")]
pub struct SlotConfig {
/// Duration of a single slot in milliseconds (e.g. 1000 for all known networks).
pub slot_length: u64,
/// Absolute slot number at the start of the Shelley era (Byron/Shelley transition point).
pub zero_slot: u64,
/// Unix timestamp in milliseconds corresponding to `zero_slot`.
pub zero_time: u64,
/// Number of slots per epoch.
pub epoch_length: u64,
}

Expand Down
11 changes: 11 additions & 0 deletions crates/common/src/errors.rs
Original file line number Diff line number Diff line change
Expand Up @@ -228,6 +228,16 @@ impl BlockfrostError {
}
}

/// error for 503 Service Unavailable
/// returned when a feature is not ready or available
pub fn service_unavailable(message: String) -> Self {
Self {
error: "Service Unavailable".to_string(),
message,
status_code: 503,
}
}

/// This error is converted in middleware to internal_server_error_user
pub fn internal_server_error(error: String) -> Self {
Self {
Expand Down Expand Up @@ -269,6 +279,7 @@ impl IntoResponse for BlockfrostError {
404 => StatusCode::NOT_FOUND,
405 => StatusCode::METHOD_NOT_ALLOWED,
500 => StatusCode::INTERNAL_SERVER_ERROR,
503 => StatusCode::SERVICE_UNAVAILABLE,
_ => StatusCode::INTERNAL_SERVER_ERROR,
};

Expand Down
9 changes: 4 additions & 5 deletions crates/node/src/chain_config.rs
Original file line number Diff line number Diff line change
Expand Up @@ -13,10 +13,9 @@ async fn init_genesis_config(
node_pool: NodePool,
) -> Result<(GenesisConfig, CurrentProtocolParam), AppError> {
let mut node = node_pool.get().await?;
match node.genesis_config_and_pp().await {
Ok((genesis_config, protocol_params)) => Ok((genesis_config, protocol_params)),
Err(e) => Err(AppError::Server(format!(
node.genesis_config_and_pp().await.map_err(|e| {
AppError::Server(format!(
"Could not fetch genesis and protocol parameters. Is the Cardano node running? {e}"
))),
}
))
})
}
283 changes: 283 additions & 0 deletions crates/node/src/chain_config_watch.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,283 @@
use std::sync::Arc;
use std::time::{Duration, SystemTime, UNIX_EPOCH};

use bf_common::chain_config::ChainConfigCache;
use bf_common::errors::BlockfrostError;
use tokio::sync::watch;
use tokio::time;

use crate::chain_config::init_caches;
use crate::pool::NodePool;

const CONWAY_ERA: u16 = ChainConfigCache::CONWAY_ERA;
const SYNC_THRESHOLD: f64 = 99.9;
const SYNC_POLL_INTERVAL: Duration = Duration::from_secs(60);
const RETRY_INTERVAL: Duration = Duration::from_secs(60);
/// Buffer after computed epoch boundary before re-querying protocol params.
const EPOCH_BOUNDARY_BUFFER: Duration = Duration::from_secs(30);
/// Error substring returned by `sync_progress()` for non-well-known networks.
const UNSUPPORTED_NETWORK_ERROR: &str = "Only well-known networks";

/// Watches the Cardano node for sync readiness and epoch-boundary protocol
/// parameter changes, publishing [`ChainConfigCache`] updates via a
/// `tokio::sync::watch` channel.
#[derive(Clone)]
pub struct ChainConfigWatch {
rx: watch::Receiver<Option<Arc<ChainConfigCache>>>,
}

impl ChainConfigWatch {
/// Spawn the background monitor and return immediately.
///
/// The watch value starts as `None` and is set to `Some(…)` once the node
/// reaches the Conway era with sufficient sync progress.
pub fn spawn(node_pool: NodePool) -> Self {
let (tx, rx) = watch::channel(None);

tokio::spawn(async move {
monitor_loop(node_pool, tx).await;
});

Self { rx }
}

/// Returns the current config or a 503 if the node is not yet synced.
pub fn get(&self) -> Result<Arc<ChainConfigCache>, BlockfrostError> {
self.rx.borrow().clone().ok_or_else(|| {
BlockfrostError::service_unavailable(
"Chain configuration is not yet available. The Cardano node may still be syncing."
.to_string(),
)
})
}

/// Wait until the first config is available (node synced and init complete).
pub async fn wait_ready(&mut self) {
while self.rx.borrow().is_none() {
if self.rx.changed().await.is_err() {
break;
Comment on lines +57 to +58
Copy link

Copilot AI Mar 24, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

wait_ready returns silently if the watch sender is dropped (e.g., the background task panics/exits) even though the config is still None, which contradicts the method’s intent and can make tests proceed with an unready app. Consider returning a Result/error on channel closure (or at least logging) so callers can fail fast instead of continuing without config.

Suggested change
if self.rx.changed().await.is_err() {
break;
match self.rx.changed().await {
Ok(_) => {
// value changed; loop condition will re-check for readiness
}
Err(err) => {
// Sender was dropped before config became available; this indicates
// that the background task has terminated and readiness will never be reached.
tracing::error!(
"ChainConfigWatch: watch channel closed before configuration became available: {}",
err
);
break;
}

Copilot uses AI. Check for mistakes.
}
}
}
}

/// The main background loop: wait for sync, init, then watch for epoch changes.
async fn monitor_loop(node_pool: NodePool, tx: watch::Sender<Option<Arc<ChainConfigCache>>>) {
// Phase 1 – wait until the node is synced (or ready enough).
wait_for_sync(&node_pool).await;

// Phase 2 – first successful init.
let config = init_until_success(&node_pool).await;
let slot_config = config.slot_config.clone();
let _ = tx.send(Some(Arc::new(config)));
tracing::info!("ChainConfigWatch: chain configuration loaded successfully");

// Phase 3 – watch for epoch changes.
loop {
let sleep_dur = duration_until_next_epoch(&slot_config);
tracing::info!(
sleep_secs = sleep_dur.as_secs(),
"ChainConfigWatch: next epoch boundary check scheduled"
);
time::sleep(sleep_dur).await;

let new_config = loop {
match init_caches(node_pool.clone()).await {
Ok(c) => break c,
Err(e) => {
tracing::error!(
"ChainConfigWatch: failed to refresh chain config at epoch boundary: {e}. \
Current config remains active, retrying in {}s. \
If this persists, check your Cardano node connectivity.",
RETRY_INTERVAL.as_secs()
);
time::sleep(RETRY_INTERVAL).await;
},
}
};

let params_changed = tx
.borrow()
.as_ref()
.is_none_or(|old| old.protocol_params != new_config.protocol_params);

if params_changed {
tracing::info!(
"ChainConfigWatch: protocol parameters changed at epoch boundary — reloading chain config"
);
let _ = tx.send(Some(Arc::new(new_config)));
} else {
tracing::debug!(
"ChainConfigWatch: epoch boundary reached, protocol parameters unchanged — no action needed"
);
}
}
}

/// Block until the node reports Conway era and sync progress ≥ threshold.
///
/// For custom (non-well-known) networks, `sync_progress()` is not available, so
/// we skip the sync check and rely on `init_caches` succeeding.
async fn wait_for_sync(node_pool: &NodePool) {
loop {
match check_sync(node_pool).await {
SyncStatus::Ready => return,
SyncStatus::NotReady(reason) => {
tracing::info!(
"ChainConfigWatch: {reason}, retrying in {}s",
SYNC_POLL_INTERVAL.as_secs()
);
time::sleep(SYNC_POLL_INTERVAL).await;
},
SyncStatus::CustomNetwork => {
tracing::info!(
"ChainConfigWatch: custom network detected — skipping sync check, will attempt init directly"
);
return;
},
}
}
}

enum SyncStatus {
Ready,
NotReady(String),
CustomNetwork,
}

async fn check_sync(node_pool: &NodePool) -> SyncStatus {
let mut node = match node_pool.get().await {
Ok(n) => n,
Err(e) => {
return SyncStatus::NotReady(format!(
"Cannot connect to Cardano node: {e}. \
Ensure node is running and socket path is correct"
));
},
};

match node.sync_progress().await {
Ok(info) => {
if info.era != CONWAY_ERA {
SyncStatus::NotReady(format!(
"Node is in era {} (need Conway era {CONWAY_ERA})",
info.era
))
} else if info.sync_progress < SYNC_THRESHOLD {
SyncStatus::NotReady(format!(
Comment on lines +161 to +167
Copy link

Copilot AI Mar 24, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ChainConfigCache::CONWAY_ERA is documented as the minimum supported era, but the sync gate treats any era other than exactly Conway as "not ready". This will block forever once the node moves to a future era (>6) even if the platform could still operate. Consider using a info.era < CONWAY_ERA check (or otherwise defining the intended policy explicitly).

Copilot uses AI. Check for mistakes.
"Node sync progress {:.2}% < {SYNC_THRESHOLD}% threshold",
info.sync_progress
))
} else {
SyncStatus::Ready
}
},
Err(e) => {
let msg = e.to_string();
if msg.contains(UNSUPPORTED_NETWORK_ERROR) {
SyncStatus::CustomNetwork
} else {
SyncStatus::NotReady(format!(
"Cannot query node sync status: {e}. \
Ensure the Cardano node is running"
))
}
},
}
}

/// Retry `init_caches` until it succeeds.
async fn init_until_success(node_pool: &NodePool) -> ChainConfigCache {
loop {
match init_caches(node_pool.clone()).await {
Ok(config) => return config,
Err(e) => {
tracing::warn!(
"ChainConfigWatch: failed to load chain config from node: {e}. \
Retrying in {}s.",
RETRY_INTERVAL.as_secs()
);
time::sleep(RETRY_INTERVAL).await;
},
}
}
}

/// Compute how long to sleep until the next epoch boundary (plus buffer).
///
/// Returns [`RETRY_INTERVAL`] as a fallback if `slot_config` contains zero
/// values that would cause a division-by-zero.
fn duration_until_next_epoch(slot_config: &bf_common::chain_config::SlotConfig) -> Duration {
if slot_config.slot_length == 0 || slot_config.epoch_length == 0 {
return RETRY_INTERVAL;
}

let now_ms = SystemTime::now()
.duration_since(UNIX_EPOCH)
.unwrap_or_default()
.as_millis() as u64;

// Current slot (may be approximate but close enough for scheduling).
let elapsed_ms = now_ms.saturating_sub(slot_config.zero_time);
let current_slot = slot_config.zero_slot + elapsed_ms / slot_config.slot_length;

// Slots into the current epoch.
let slot_in_epoch = (current_slot - slot_config.zero_slot) % slot_config.epoch_length;
let slots_until_next = slot_config.epoch_length - slot_in_epoch;
Copy link

Copilot AI Mar 24, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

duration_until_next_epoch waits a full epoch when slot_in_epoch == 0 (exactly on an epoch boundary), because slots_until_next becomes epoch_length. That can delay the epoch-boundary refresh by an entire epoch in the boundary case. Consider computing slots_until_next so that a boundary yields 0 (or 1) and then adding the buffer, e.g. (epoch_length - slot_in_epoch) % epoch_length.

Suggested change
let slots_until_next = slot_config.epoch_length - slot_in_epoch;
// Compute slots until next epoch boundary; if we're exactly on a boundary,
// this yields 0 instead of a full epoch.
let slots_until_next =
(slot_config.epoch_length - slot_in_epoch) % slot_config.epoch_length;

Copilot uses AI. Check for mistakes.

let ms_until_next = slots_until_next * slot_config.slot_length;
Duration::from_millis(ms_until_next) + EPOCH_BOUNDARY_BUFFER
}

#[cfg(test)]
mod tests {
use super::*;
use bf_common::chain_config::SlotConfig;

#[test]
fn test_zero_slot_length_returns_retry_interval() {
let config = SlotConfig {
slot_length: 0,
zero_slot: 0,
zero_time: 0,
epoch_length: 432000,
};
assert_eq!(duration_until_next_epoch(&config), RETRY_INTERVAL);
}

#[test]
fn test_zero_epoch_length_returns_retry_interval() {
let config = SlotConfig {
slot_length: 1000,
zero_slot: 0,
zero_time: 0,
epoch_length: 0,
};
assert_eq!(duration_until_next_epoch(&config), RETRY_INTERVAL);
}

#[test]
fn test_result_includes_buffer() {
let result = duration_until_next_epoch(&SlotConfig::preview());
assert!(result > EPOCH_BOUNDARY_BUFFER);
}

#[test]
fn test_result_at_most_one_epoch() {
let config = SlotConfig::preview();
let max =
Duration::from_millis(config.epoch_length * config.slot_length) + EPOCH_BOUNDARY_BUFFER;
let result = duration_until_next_epoch(&config);
assert!(result <= max, "result {result:?} exceeds max {max:?}");
}

#[test]
fn test_mainnet_config() {
let config = SlotConfig::mainnet();
let max =
Duration::from_millis(config.epoch_length * config.slot_length) + EPOCH_BOUNDARY_BUFFER;
let result = duration_until_next_epoch(&config);
assert!(result > EPOCH_BOUNDARY_BUFFER);
assert!(result <= max);
}
}
Loading
Loading