Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
335 changes: 335 additions & 0 deletions notebooks/10-block-propagation.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,335 @@
{
"cells": [
{
"cell_type": "markdown",
"id": "0",
"metadata": {},
"source": [
"Analysis of block propagation latency across Ethereum mainnet.\n",
"\n",
"**Methodology:** Measures time from slot start to when Xatu sentry nodes first observe the block via gossipsub. The \"spread\" shows how long it takes for all sentry nodes to see the block after the fastest node."
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "1",
"metadata": {
"tags": [
"parameters"
]
},
"outputs": [],
"source": [
"import pandas as pd\n",
"import numpy as np\n",
"import plotly.express as px\n",
"import plotly.graph_objects as go\n",
"from plotly.subplots import make_subplots\n",
"\n",
"from loaders import load_parquet, display_sql\n",
"\n",
"target_date = None # Set via papermill, or auto-detect from manifest"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "2",
"metadata": {
"tags": [
"sql-fold"
]
},
"outputs": [],
"source": [
"display_sql(\"block_propagation\", target_date)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "3",
"metadata": {},
"outputs": [],
"source": [
"df = load_parquet(\"block_propagation\", target_date)\n",
"\n",
"# Convert epoch_start to datetime\n",
"df[\"epoch_start\"] = pd.to_datetime(df[\"epoch_start\"])\n",
"\n",
"total_epochs = len(df)\n",
"total_slots = df[\"slot_count\"].sum()\n",
"avg_nodes = df[\"avg_nodes\"].mean()\n",
"\n",
"# Overall percentiles (weighted by slot count)\n",
"p50_overall = np.average(df[\"p50_ms\"], weights=df[\"slot_count\"])\n",
"p90_overall = np.average(df[\"p90_ms\"], weights=df[\"slot_count\"])\n",
"p99_overall = np.average(df[\"p99_ms\"], weights=df[\"slot_count\"])\n",
"\n",
"print(f\"Epochs: {total_epochs:,}\")\n",
"print(f\"Slots: {total_slots:,}\")\n",
"print(f\"Average sentry nodes: {avg_nodes:.1f}\")\n",
"print(f\"\\nBlock arrival latency (from slot start):\")\n",
"print(f\" p50: {p50_overall:.0f} ms ({p50_overall/1000:.2f} s)\")\n",
"print(f\" p90: {p90_overall:.0f} ms ({p90_overall/1000:.2f} s)\")\n",
"print(f\" p99: {p99_overall:.0f} ms ({p99_overall/1000:.2f} s)\")"
]
},
{
"cell_type": "markdown",
"id": "4",
"metadata": {},
"source": [
"## Block arrival latency over time\n",
"\n",
"Time from slot start to when the block is first observed by any sentry node. Lower is better. Each line represents a percentile of the distribution within each epoch."
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "5",
"metadata": {},
"outputs": [],
"source": [
"# Melt percentile columns for line chart\n",
"percentile_cols = [\"p50_ms\", \"p75_ms\", \"p80_ms\", \"p85_ms\", \"p90_ms\", \"p95_ms\", \"p99_ms\"]\n",
"df_long = df.melt(\n",
" id_vars=[\"epoch\", \"epoch_start\"],\n",
" value_vars=percentile_cols,\n",
" var_name=\"percentile\",\n",
" value_name=\"latency_ms\"\n",
")\n",
"df_long[\"latency_s\"] = df_long[\"latency_ms\"] / 1000\n",
"df_long[\"percentile_label\"] = df_long[\"percentile\"].str.replace(\"_ms\", \"\").str.upper()\n",
"\n",
"# Color scheme for percentiles (darker = higher percentile)\n",
"colors = {\n",
" \"P50\": \"#2ecc71\",\n",
" \"P75\": \"#3498db\",\n",
" \"P80\": \"#9b59b6\",\n",
" \"P85\": \"#e67e22\",\n",
" \"P90\": \"#e74c3c\",\n",
" \"P95\": \"#c0392b\",\n",
" \"P99\": \"#7b241c\",\n",
"}\n",
"\n",
"fig = px.line(\n",
" df_long,\n",
" x=\"epoch_start\",\n",
" y=\"latency_s\",\n",
" color=\"percentile_label\",\n",
" color_discrete_map=colors,\n",
" labels={\"epoch_start\": \"Time\", \"latency_s\": \"Latency (seconds)\", \"percentile_label\": \"Percentile\"},\n",
" category_orders={\"percentile_label\": [\"P50\", \"P75\", \"P80\", \"P85\", \"P90\", \"P95\", \"P99\"]},\n",
")\n",
"fig.update_traces(line=dict(width=2))\n",
"fig.update_layout(\n",
" margin=dict(l=60, r=30, t=30, b=60),\n",
" legend=dict(orientation=\"h\", yanchor=\"bottom\", y=1.02, xanchor=\"left\", x=0),\n",
" height=450,\n",
" xaxis=dict(title=\"Time (UTC)\"),\n",
")\n",
"fig.show(config={\"responsive\": True})"
]
},
{
"cell_type": "markdown",
"id": "6",
"metadata": {},
"source": [
"## Network spread over time\n",
"\n",
"Time between the fastest and slowest sentry node observing each block. This measures how synchronized the network view is. Lower spread means more consistent propagation."
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "7",
"metadata": {},
"outputs": [],
"source": [
"# Spread percentiles\n",
"spread_cols = [\"spread_p50_ms\", \"spread_p75_ms\", \"spread_p90_ms\", \"spread_p95_ms\", \"spread_p99_ms\"]\n",
"df_spread = df.melt(\n",
" id_vars=[\"epoch\", \"epoch_start\"],\n",
" value_vars=spread_cols,\n",
" var_name=\"percentile\",\n",
" value_name=\"spread_ms\"\n",
")\n",
"df_spread[\"spread_s\"] = df_spread[\"spread_ms\"] / 1000\n",
"df_spread[\"percentile_label\"] = df_spread[\"percentile\"].str.replace(\"spread_\", \"\").str.replace(\"_ms\", \"\").str.upper()\n",
"\n",
"spread_colors = {\n",
" \"P50\": \"#27ae60\",\n",
" \"P75\": \"#2980b9\",\n",
" \"P90\": \"#8e44ad\",\n",
" \"P95\": \"#d35400\",\n",
" \"P99\": \"#c0392b\",\n",
"}\n",
"\n",
"fig = px.line(\n",
" df_spread,\n",
" x=\"epoch_start\",\n",
" y=\"spread_s\",\n",
" color=\"percentile_label\",\n",
" color_discrete_map=spread_colors,\n",
" labels={\"epoch_start\": \"Time\", \"spread_s\": \"Spread (seconds)\", \"percentile_label\": \"Percentile\"},\n",
" category_orders={\"percentile_label\": [\"P50\", \"P75\", \"P90\", \"P95\", \"P99\"]},\n",
")\n",
"fig.update_traces(line=dict(width=2))\n",
"fig.update_layout(\n",
" margin=dict(l=60, r=30, t=30, b=60),\n",
" legend=dict(orientation=\"h\", yanchor=\"bottom\", y=1.02, xanchor=\"left\", x=0),\n",
" height=400,\n",
" xaxis=dict(title=\"Time (UTC)\"),\n",
")\n",
"fig.show(config={\"responsive\": True})"
]
},
{
"cell_type": "markdown",
"id": "8",
"metadata": {},
"source": [
"## Latency distribution\n",
"\n",
"Distribution of p50 arrival latencies across all epochs. The box plot shows the variability throughout the day."
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "9",
"metadata": {},
"outputs": [],
"source": [
"# Create histogram of p50 latencies\n",
"fig = make_subplots(rows=1, cols=2, subplot_titles=(\"P50 latency distribution\", \"P99 latency distribution\"))\n",
"\n",
"fig.add_trace(\n",
" go.Histogram(x=df[\"p50_ms\"], nbinsx=30, marker_color=\"#2ecc71\", name=\"P50\"),\n",
" row=1, col=1\n",
")\n",
"fig.add_trace(\n",
" go.Histogram(x=df[\"p99_ms\"], nbinsx=30, marker_color=\"#e74c3c\", name=\"P99\"),\n",
" row=1, col=2\n",
")\n",
"\n",
"fig.update_xaxes(title_text=\"Latency (ms)\", row=1, col=1)\n",
"fig.update_xaxes(title_text=\"Latency (ms)\", row=1, col=2)\n",
"fig.update_yaxes(title_text=\"Epoch count\", row=1, col=1)\n",
"fig.update_yaxes(title_text=\"Epoch count\", row=1, col=2)\n",
"\n",
"fig.update_layout(\n",
" margin=dict(l=60, r=30, t=60, b=60),\n",
" showlegend=False,\n",
" height=350,\n",
")\n",
"fig.show(config={\"responsive\": True})"
]
},
{
"cell_type": "markdown",
"id": "10",
"metadata": {},
"source": [
"## Spread vs arrival latency\n",
"\n",
"Relationship between how fast the first node sees a block and how long it takes all nodes to see it. Blocks that arrive late often have larger spread."
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "11",
"metadata": {},
"outputs": [],
"source": [
"fig = px.scatter(\n",
" df,\n",
" x=\"p50_ms\",\n",
" y=\"spread_p50_ms\",\n",
" opacity=0.6,\n",
" labels={\"p50_ms\": \"P50 arrival latency (ms)\", \"spread_p50_ms\": \"P50 spread (ms)\"},\n",
" hover_data={\"epoch\": True, \"slot_count\": True},\n",
")\n",
"fig.update_traces(marker=dict(size=8, color=\"#3498db\"))\n",
"fig.update_layout(\n",
" margin=dict(l=60, r=30, t=30, b=60),\n",
" height=400,\n",
")\n",
"fig.show(config={\"responsive\": True})"
]
},
{
"cell_type": "markdown",
"id": "12",
"metadata": {},
"source": [
"## Summary statistics\n",
"\n",
"Daily summary of block propagation performance."
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "13",
"metadata": {},
"outputs": [],
"source": [
"# Summary table\n",
"summary = {\n",
" \"Metric\": [\n",
" \"Total epochs\",\n",
" \"Total slots\",\n",
" \"Avg sentry nodes\",\n",
" \"P50 arrival (ms)\",\n",
" \"P75 arrival (ms)\",\n",
" \"P90 arrival (ms)\",\n",
" \"P95 arrival (ms)\",\n",
" \"P99 arrival (ms)\",\n",
" \"Max arrival (ms)\",\n",
" \"P50 spread (ms)\",\n",
" \"P90 spread (ms)\",\n",
" \"P99 spread (ms)\",\n",
" \"Max spread (ms)\",\n",
" ],\n",
" \"Value\": [\n",
" f\"{total_epochs:,}\",\n",
" f\"{total_slots:,}\",\n",
" f\"{avg_nodes:.1f}\",\n",
" f\"{p50_overall:.0f}\",\n",
" f\"{np.average(df['p75_ms'], weights=df['slot_count']):.0f}\",\n",
" f\"{p90_overall:.0f}\",\n",
" f\"{np.average(df['p95_ms'], weights=df['slot_count']):.0f}\",\n",
" f\"{p99_overall:.0f}\",\n",
" f\"{df['max_arrival_ms'].max():.0f}\",\n",
" f\"{np.average(df['spread_p50_ms'], weights=df['slot_count']):.0f}\",\n",
" f\"{np.average(df['spread_p90_ms'], weights=df['slot_count']):.0f}\",\n",
" f\"{np.average(df['spread_p99_ms'], weights=df['slot_count']):.0f}\",\n",
" f\"{df['max_spread_ms'].max():.0f}\",\n",
" ],\n",
"}\n",
"\n",
"pd.DataFrame(summary)"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"name": "python",
"version": "3.12.0"
}
},
"nbformat": 4,
"nbformat_minor": 5
}
Loading
Loading