diff --git a/notebooks/10-block-propagation.ipynb b/notebooks/10-block-propagation.ipynb new file mode 100644 index 0000000..cf67555 --- /dev/null +++ b/notebooks/10-block-propagation.ipynb @@ -0,0 +1,335 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "0", + "metadata": {}, + "source": [ + "Analysis of block propagation latency across Ethereum mainnet.\n", + "\n", + "**Methodology:** Measures time from slot start to when Xatu sentry nodes first observe the block via gossipsub. The \"spread\" shows how long it takes for all sentry nodes to see the block after the fastest node." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1", + "metadata": { + "tags": [ + "parameters" + ] + }, + "outputs": [], + "source": [ + "import pandas as pd\n", + "import numpy as np\n", + "import plotly.express as px\n", + "import plotly.graph_objects as go\n", + "from plotly.subplots import make_subplots\n", + "\n", + "from loaders import load_parquet, display_sql\n", + "\n", + "target_date = None # Set via papermill, or auto-detect from manifest" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2", + "metadata": { + "tags": [ + "sql-fold" + ] + }, + "outputs": [], + "source": [ + "display_sql(\"block_propagation\", target_date)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3", + "metadata": {}, + "outputs": [], + "source": [ + "df = load_parquet(\"block_propagation\", target_date)\n", + "\n", + "# Convert epoch_start to datetime\n", + "df[\"epoch_start\"] = pd.to_datetime(df[\"epoch_start\"])\n", + "\n", + "total_epochs = len(df)\n", + "total_slots = df[\"slot_count\"].sum()\n", + "avg_nodes = df[\"avg_nodes\"].mean()\n", + "\n", + "# Overall percentiles (weighted by slot count)\n", + "p50_overall = np.average(df[\"p50_ms\"], weights=df[\"slot_count\"])\n", + "p90_overall = np.average(df[\"p90_ms\"], weights=df[\"slot_count\"])\n", + "p99_overall = np.average(df[\"p99_ms\"], weights=df[\"slot_count\"])\n", + "\n", + "print(f\"Epochs: {total_epochs:,}\")\n", + "print(f\"Slots: {total_slots:,}\")\n", + "print(f\"Average sentry nodes: {avg_nodes:.1f}\")\n", + "print(f\"\\nBlock arrival latency (from slot start):\")\n", + "print(f\" p50: {p50_overall:.0f} ms ({p50_overall/1000:.2f} s)\")\n", + "print(f\" p90: {p90_overall:.0f} ms ({p90_overall/1000:.2f} s)\")\n", + "print(f\" p99: {p99_overall:.0f} ms ({p99_overall/1000:.2f} s)\")" + ] + }, + { + "cell_type": "markdown", + "id": "4", + "metadata": {}, + "source": [ + "## Block arrival latency over time\n", + "\n", + "Time from slot start to when the block is first observed by any sentry node. Lower is better. Each line represents a percentile of the distribution within each epoch." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "5", + "metadata": {}, + "outputs": [], + "source": [ + "# Melt percentile columns for line chart\n", + "percentile_cols = [\"p50_ms\", \"p75_ms\", \"p80_ms\", \"p85_ms\", \"p90_ms\", \"p95_ms\", \"p99_ms\"]\n", + "df_long = df.melt(\n", + " id_vars=[\"epoch\", \"epoch_start\"],\n", + " value_vars=percentile_cols,\n", + " var_name=\"percentile\",\n", + " value_name=\"latency_ms\"\n", + ")\n", + "df_long[\"latency_s\"] = df_long[\"latency_ms\"] / 1000\n", + "df_long[\"percentile_label\"] = df_long[\"percentile\"].str.replace(\"_ms\", \"\").str.upper()\n", + "\n", + "# Color scheme for percentiles (darker = higher percentile)\n", + "colors = {\n", + " \"P50\": \"#2ecc71\",\n", + " \"P75\": \"#3498db\",\n", + " \"P80\": \"#9b59b6\",\n", + " \"P85\": \"#e67e22\",\n", + " \"P90\": \"#e74c3c\",\n", + " \"P95\": \"#c0392b\",\n", + " \"P99\": \"#7b241c\",\n", + "}\n", + "\n", + "fig = px.line(\n", + " df_long,\n", + " x=\"epoch_start\",\n", + " y=\"latency_s\",\n", + " color=\"percentile_label\",\n", + " color_discrete_map=colors,\n", + " labels={\"epoch_start\": \"Time\", \"latency_s\": \"Latency (seconds)\", \"percentile_label\": \"Percentile\"},\n", + " category_orders={\"percentile_label\": [\"P50\", \"P75\", \"P80\", \"P85\", \"P90\", \"P95\", \"P99\"]},\n", + ")\n", + "fig.update_traces(line=dict(width=2))\n", + "fig.update_layout(\n", + " margin=dict(l=60, r=30, t=30, b=60),\n", + " legend=dict(orientation=\"h\", yanchor=\"bottom\", y=1.02, xanchor=\"left\", x=0),\n", + " height=450,\n", + " xaxis=dict(title=\"Time (UTC)\"),\n", + ")\n", + "fig.show(config={\"responsive\": True})" + ] + }, + { + "cell_type": "markdown", + "id": "6", + "metadata": {}, + "source": [ + "## Network spread over time\n", + "\n", + "Time between the fastest and slowest sentry node observing each block. This measures how synchronized the network view is. Lower spread means more consistent propagation." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "7", + "metadata": {}, + "outputs": [], + "source": [ + "# Spread percentiles\n", + "spread_cols = [\"spread_p50_ms\", \"spread_p75_ms\", \"spread_p90_ms\", \"spread_p95_ms\", \"spread_p99_ms\"]\n", + "df_spread = df.melt(\n", + " id_vars=[\"epoch\", \"epoch_start\"],\n", + " value_vars=spread_cols,\n", + " var_name=\"percentile\",\n", + " value_name=\"spread_ms\"\n", + ")\n", + "df_spread[\"spread_s\"] = df_spread[\"spread_ms\"] / 1000\n", + "df_spread[\"percentile_label\"] = df_spread[\"percentile\"].str.replace(\"spread_\", \"\").str.replace(\"_ms\", \"\").str.upper()\n", + "\n", + "spread_colors = {\n", + " \"P50\": \"#27ae60\",\n", + " \"P75\": \"#2980b9\",\n", + " \"P90\": \"#8e44ad\",\n", + " \"P95\": \"#d35400\",\n", + " \"P99\": \"#c0392b\",\n", + "}\n", + "\n", + "fig = px.line(\n", + " df_spread,\n", + " x=\"epoch_start\",\n", + " y=\"spread_s\",\n", + " color=\"percentile_label\",\n", + " color_discrete_map=spread_colors,\n", + " labels={\"epoch_start\": \"Time\", \"spread_s\": \"Spread (seconds)\", \"percentile_label\": \"Percentile\"},\n", + " category_orders={\"percentile_label\": [\"P50\", \"P75\", \"P90\", \"P95\", \"P99\"]},\n", + ")\n", + "fig.update_traces(line=dict(width=2))\n", + "fig.update_layout(\n", + " margin=dict(l=60, r=30, t=30, b=60),\n", + " legend=dict(orientation=\"h\", yanchor=\"bottom\", y=1.02, xanchor=\"left\", x=0),\n", + " height=400,\n", + " xaxis=dict(title=\"Time (UTC)\"),\n", + ")\n", + "fig.show(config={\"responsive\": True})" + ] + }, + { + "cell_type": "markdown", + "id": "8", + "metadata": {}, + "source": [ + "## Latency distribution\n", + "\n", + "Distribution of p50 arrival latencies across all epochs. The box plot shows the variability throughout the day." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9", + "metadata": {}, + "outputs": [], + "source": [ + "# Create histogram of p50 latencies\n", + "fig = make_subplots(rows=1, cols=2, subplot_titles=(\"P50 latency distribution\", \"P99 latency distribution\"))\n", + "\n", + "fig.add_trace(\n", + " go.Histogram(x=df[\"p50_ms\"], nbinsx=30, marker_color=\"#2ecc71\", name=\"P50\"),\n", + " row=1, col=1\n", + ")\n", + "fig.add_trace(\n", + " go.Histogram(x=df[\"p99_ms\"], nbinsx=30, marker_color=\"#e74c3c\", name=\"P99\"),\n", + " row=1, col=2\n", + ")\n", + "\n", + "fig.update_xaxes(title_text=\"Latency (ms)\", row=1, col=1)\n", + "fig.update_xaxes(title_text=\"Latency (ms)\", row=1, col=2)\n", + "fig.update_yaxes(title_text=\"Epoch count\", row=1, col=1)\n", + "fig.update_yaxes(title_text=\"Epoch count\", row=1, col=2)\n", + "\n", + "fig.update_layout(\n", + " margin=dict(l=60, r=30, t=60, b=60),\n", + " showlegend=False,\n", + " height=350,\n", + ")\n", + "fig.show(config={\"responsive\": True})" + ] + }, + { + "cell_type": "markdown", + "id": "10", + "metadata": {}, + "source": [ + "## Spread vs arrival latency\n", + "\n", + "Relationship between how fast the first node sees a block and how long it takes all nodes to see it. Blocks that arrive late often have larger spread." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "11", + "metadata": {}, + "outputs": [], + "source": [ + "fig = px.scatter(\n", + " df,\n", + " x=\"p50_ms\",\n", + " y=\"spread_p50_ms\",\n", + " opacity=0.6,\n", + " labels={\"p50_ms\": \"P50 arrival latency (ms)\", \"spread_p50_ms\": \"P50 spread (ms)\"},\n", + " hover_data={\"epoch\": True, \"slot_count\": True},\n", + ")\n", + "fig.update_traces(marker=dict(size=8, color=\"#3498db\"))\n", + "fig.update_layout(\n", + " margin=dict(l=60, r=30, t=30, b=60),\n", + " height=400,\n", + ")\n", + "fig.show(config={\"responsive\": True})" + ] + }, + { + "cell_type": "markdown", + "id": "12", + "metadata": {}, + "source": [ + "## Summary statistics\n", + "\n", + "Daily summary of block propagation performance." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "13", + "metadata": {}, + "outputs": [], + "source": [ + "# Summary table\n", + "summary = {\n", + " \"Metric\": [\n", + " \"Total epochs\",\n", + " \"Total slots\",\n", + " \"Avg sentry nodes\",\n", + " \"P50 arrival (ms)\",\n", + " \"P75 arrival (ms)\",\n", + " \"P90 arrival (ms)\",\n", + " \"P95 arrival (ms)\",\n", + " \"P99 arrival (ms)\",\n", + " \"Max arrival (ms)\",\n", + " \"P50 spread (ms)\",\n", + " \"P90 spread (ms)\",\n", + " \"P99 spread (ms)\",\n", + " \"Max spread (ms)\",\n", + " ],\n", + " \"Value\": [\n", + " f\"{total_epochs:,}\",\n", + " f\"{total_slots:,}\",\n", + " f\"{avg_nodes:.1f}\",\n", + " f\"{p50_overall:.0f}\",\n", + " f\"{np.average(df['p75_ms'], weights=df['slot_count']):.0f}\",\n", + " f\"{p90_overall:.0f}\",\n", + " f\"{np.average(df['p95_ms'], weights=df['slot_count']):.0f}\",\n", + " f\"{p99_overall:.0f}\",\n", + " f\"{df['max_arrival_ms'].max():.0f}\",\n", + " f\"{np.average(df['spread_p50_ms'], weights=df['slot_count']):.0f}\",\n", + " f\"{np.average(df['spread_p90_ms'], weights=df['slot_count']):.0f}\",\n", + " f\"{np.average(df['spread_p99_ms'], weights=df['slot_count']):.0f}\",\n", + " f\"{df['max_spread_ms'].max():.0f}\",\n", + " ],\n", + "}\n", + "\n", + "pd.DataFrame(summary)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "name": "python", + "version": "3.12.0" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/notebooks/11-attestation-arrival.ipynb b/notebooks/11-attestation-arrival.ipynb new file mode 100644 index 0000000..5958c21 --- /dev/null +++ b/notebooks/11-attestation-arrival.ipynb @@ -0,0 +1,405 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "0", + "metadata": {}, + "source": [ + "Analysis of attestation arrival latency across Ethereum mainnet.\n", + "\n", + "**Methodology:** Measures time from slot start to when Xatu sentry nodes first observe attestations via gossipsub. Attestations are broadcast on 64 subnets (committees). Late attestations (>4s) may miss inclusion in the next block." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1", + "metadata": { + "tags": [ + "parameters" + ] + }, + "outputs": [], + "source": [ + "import pandas as pd\n", + "import numpy as np\n", + "import plotly.express as px\n", + "import plotly.graph_objects as go\n", + "from plotly.subplots import make_subplots\n", + "\n", + "from loaders import load_parquet, display_sql\n", + "\n", + "target_date = None # Set via papermill, or auto-detect from manifest" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2", + "metadata": { + "tags": [ + "sql-fold" + ] + }, + "outputs": [], + "source": [ + "display_sql(\"attestation_propagation\", target_date)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3", + "metadata": {}, + "outputs": [], + "source": [ + "df = load_parquet(\"attestation_propagation\", target_date)\n", + "\n", + "# Convert hour to datetime\n", + "df[\"hour\"] = pd.to_datetime(df[\"hour\"])\n", + "\n", + "total_hours = len(df)\n", + "total_attestations = df[\"attestation_count\"].sum()\n", + "total_slots = df[\"slot_count\"].sum()\n", + "\n", + "# Overall percentiles (weighted by attestation count)\n", + "p50_overall = np.average(df[\"p50_ms\"], weights=df[\"attestation_count\"])\n", + "p90_overall = np.average(df[\"p90_ms\"], weights=df[\"attestation_count\"])\n", + "p99_overall = np.average(df[\"p99_ms\"], weights=df[\"attestation_count\"])\n", + "\n", + "print(f\"Hours: {total_hours:,}\")\n", + "print(f\"Slots: {total_slots:,}\")\n", + "print(f\"Attestations observed: {total_attestations:,}\")\n", + "print(f\"\\nAttestation arrival latency (from slot start):\")\n", + "print(f\" p50: {p50_overall:.0f} ms ({p50_overall/1000:.2f} s)\")\n", + "print(f\" p90: {p90_overall:.0f} ms ({p90_overall/1000:.2f} s)\")\n", + "print(f\" p99: {p99_overall:.0f} ms ({p99_overall/1000:.2f} s)\")" + ] + }, + { + "cell_type": "markdown", + "id": "4", + "metadata": {}, + "source": [ + "## Attestation arrival latency over time\n", + "\n", + "Time from slot start to when attestations are first observed. Each line represents a percentile of the distribution within each hour. Attestations arriving after 4 seconds (dashed line) may miss the next block." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "5", + "metadata": {}, + "outputs": [], + "source": [ + "# Melt percentile columns for line chart\n", + "percentile_cols = [\"p50_ms\", \"p75_ms\", \"p80_ms\", \"p85_ms\", \"p90_ms\", \"p95_ms\", \"p99_ms\"]\n", + "df_long = df.melt(\n", + " id_vars=[\"hour\"],\n", + " value_vars=percentile_cols,\n", + " var_name=\"percentile\",\n", + " value_name=\"latency_ms\"\n", + ")\n", + "df_long[\"latency_s\"] = df_long[\"latency_ms\"] / 1000\n", + "df_long[\"percentile_label\"] = df_long[\"percentile\"].str.replace(\"_ms\", \"\").str.upper()\n", + "\n", + "# Color scheme for percentiles\n", + "colors = {\n", + " \"P50\": \"#2ecc71\",\n", + " \"P75\": \"#3498db\",\n", + " \"P80\": \"#9b59b6\",\n", + " \"P85\": \"#e67e22\",\n", + " \"P90\": \"#e74c3c\",\n", + " \"P95\": \"#c0392b\",\n", + " \"P99\": \"#7b241c\",\n", + "}\n", + "\n", + "fig = px.line(\n", + " df_long,\n", + " x=\"hour\",\n", + " y=\"latency_s\",\n", + " color=\"percentile_label\",\n", + " color_discrete_map=colors,\n", + " labels={\"hour\": \"Time\", \"latency_s\": \"Latency (seconds)\", \"percentile_label\": \"Percentile\"},\n", + " category_orders={\"percentile_label\": [\"P50\", \"P75\", \"P80\", \"P85\", \"P90\", \"P95\", \"P99\"]},\n", + ")\n", + "fig.update_traces(line=dict(width=2))\n", + "\n", + "# Add 4-second reference line (attestation deadline)\n", + "fig.add_hline(y=4, line_dash=\"dash\", line_color=\"gray\", annotation_text=\"4s deadline\", annotation_position=\"right\")\n", + "\n", + "fig.update_layout(\n", + " margin=dict(l=60, r=30, t=30, b=60),\n", + " legend=dict(orientation=\"h\", yanchor=\"bottom\", y=1.02, xanchor=\"left\", x=0),\n", + " height=450,\n", + " xaxis=dict(title=\"Time (UTC)\"),\n", + ")\n", + "fig.show(config={\"responsive\": True})" + ] + }, + { + "cell_type": "markdown", + "id": "6", + "metadata": {}, + "source": [ + "## Latency distribution\n", + "\n", + "Distribution of per-hour median (P50) and tail (P99) attestation latencies throughout the day." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "7", + "metadata": {}, + "outputs": [], + "source": [ + "# Create histogram of latencies\n", + "fig = make_subplots(rows=1, cols=2, subplot_titles=(\"P50 latency distribution\", \"P99 latency distribution\"))\n", + "\n", + "fig.add_trace(\n", + " go.Histogram(x=df[\"p50_ms\"] / 1000, nbinsx=30, marker_color=\"#2ecc71\", name=\"P50\"),\n", + " row=1, col=1\n", + ")\n", + "fig.add_trace(\n", + " go.Histogram(x=df[\"p99_ms\"] / 1000, nbinsx=30, marker_color=\"#e74c3c\", name=\"P99\"),\n", + " row=1, col=2\n", + ")\n", + "\n", + "fig.update_xaxes(title_text=\"Latency (s)\", row=1, col=1)\n", + "fig.update_xaxes(title_text=\"Latency (s)\", row=1, col=2)\n", + "fig.update_yaxes(title_text=\"Hour count\", row=1, col=1)\n", + "fig.update_yaxes(title_text=\"Hour count\", row=1, col=2)\n", + "\n", + "fig.update_layout(\n", + " margin=dict(l=60, r=30, t=60, b=60),\n", + " showlegend=False,\n", + " height=350,\n", + ")\n", + "fig.show(config={\"responsive\": True})" + ] + }, + { + "cell_type": "markdown", + "id": "8", + "metadata": {}, + "source": [ + "## Committee (subnet) analysis\n", + "\n", + "Per-committee latency summary for the day. Committees with significantly higher latency may indicate subnet-specific issues." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9", + "metadata": { + "tags": [ + "sql-fold" + ] + }, + "outputs": [], + "source": [ + "display_sql(\"attestation_by_committee\", target_date)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "10", + "metadata": {}, + "outputs": [], + "source": [ + "df_comm = load_parquet(\"attestation_by_committee\", target_date)\n", + "\n", + "# Convert committee to int for proper sorting\n", + "df_comm[\"committee_num\"] = pd.to_numeric(df_comm[\"committee\"], errors=\"coerce\").fillna(-1).astype(int)\n", + "df_comm = df_comm.sort_values(\"committee_num\")\n", + "\n", + "# Calculate overall median for comparison\n", + "overall_p50 = df_comm[\"p50_ms\"].median()\n", + "overall_p99 = df_comm[\"p99_ms\"].median()\n", + "\n", + "print(f\"Committees: {len(df_comm)}\")\n", + "print(f\"Total attestations: {df_comm['attestation_count'].sum():,}\")\n", + "print(f\"\\nCross-committee medians:\")\n", + "print(f\" P50: {overall_p50:.0f} ms\")\n", + "print(f\" P99: {overall_p99:.0f} ms\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "11", + "metadata": {}, + "outputs": [], + "source": [ + "# Bar chart of P50 and P99 by committee\n", + "fig = go.Figure()\n", + "\n", + "fig.add_trace(go.Bar(\n", + " x=df_comm[\"committee\"],\n", + " y=df_comm[\"p50_ms\"] / 1000,\n", + " name=\"P50\",\n", + " marker_color=\"#2ecc71\",\n", + "))\n", + "fig.add_trace(go.Bar(\n", + " x=df_comm[\"committee\"],\n", + " y=df_comm[\"p99_ms\"] / 1000,\n", + " name=\"P99\",\n", + " marker_color=\"#e74c3c\",\n", + "))\n", + "\n", + "# Add reference lines\n", + "fig.add_hline(y=4, line_dash=\"dash\", line_color=\"gray\", annotation_text=\"4s deadline\")\n", + "\n", + "fig.update_layout(\n", + " barmode=\"group\",\n", + " margin=dict(l=60, r=30, t=30, b=60),\n", + " xaxis=dict(title=\"Committee (subnet)\", tickangle=45, dtick=4),\n", + " yaxis=dict(title=\"Latency (seconds)\"),\n", + " legend=dict(orientation=\"h\", yanchor=\"bottom\", y=1.02, xanchor=\"left\", x=0),\n", + " height=400,\n", + ")\n", + "fig.show(config={\"responsive\": True})" + ] + }, + { + "cell_type": "markdown", + "id": "12", + "metadata": {}, + "source": [ + "## Slow committees\n", + "\n", + "Committees with P99 latency significantly above the median. These subnets may have connectivity or performance issues." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "13", + "metadata": {}, + "outputs": [], + "source": [ + "# Identify slow committees (P99 > 1.5x median P99)\n", + "threshold = overall_p99 * 1.5\n", + "df_slow = df_comm[df_comm[\"p99_ms\"] > threshold].copy()\n", + "df_slow = df_slow.sort_values(\"p99_ms\", ascending=False)\n", + "\n", + "if len(df_slow) > 0:\n", + " print(f\"Committees with P99 > {threshold:.0f} ms ({threshold/1000:.1f}s):\")\n", + " display_df = df_slow[[\"committee\", \"attestation_count\", \"p50_ms\", \"p90_ms\", \"p99_ms\", \"max_ms\"]].copy()\n", + " display_df.columns = [\"Committee\", \"Attestations\", \"P50 (ms)\", \"P90 (ms)\", \"P99 (ms)\", \"Max (ms)\"]\n", + " for col in [\"P50 (ms)\", \"P90 (ms)\", \"P99 (ms)\", \"Max (ms)\"]:\n", + " display_df[col] = display_df[col].round(0).astype(int)\n", + " display(display_df)\n", + "else:\n", + " print(f\"No committees with P99 > {threshold:.0f} ms - all subnets performing within normal range.\")" + ] + }, + { + "cell_type": "markdown", + "id": "14", + "metadata": {}, + "source": [ + "## Committee latency heatmap\n", + "\n", + "Visual comparison of latency percentiles across all 64 committees." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "15", + "metadata": {}, + "outputs": [], + "source": [ + "# Prepare heatmap data\n", + "heatmap_cols = [\"p50_ms\", \"p75_ms\", \"p90_ms\", \"p95_ms\", \"p99_ms\"]\n", + "heatmap_data = df_comm.set_index(\"committee\")[heatmap_cols].T\n", + "heatmap_data.index = [\"P50\", \"P75\", \"P90\", \"P95\", \"P99\"]\n", + "\n", + "# Convert to seconds for display\n", + "heatmap_data = heatmap_data / 1000\n", + "\n", + "fig = go.Figure(data=go.Heatmap(\n", + " z=heatmap_data.values,\n", + " x=heatmap_data.columns,\n", + " y=heatmap_data.index,\n", + " colorscale=\"YlOrRd\",\n", + " colorbar=dict(title=\"Latency (s)\"),\n", + "))\n", + "\n", + "fig.update_layout(\n", + " margin=dict(l=60, r=30, t=30, b=60),\n", + " xaxis=dict(title=\"Committee\", tickangle=45, dtick=4),\n", + " yaxis=dict(title=\"Percentile\", autorange=\"reversed\"),\n", + " height=300,\n", + ")\n", + "fig.show(config={\"responsive\": True})" + ] + }, + { + "cell_type": "markdown", + "id": "16", + "metadata": {}, + "source": [ + "## Summary statistics" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "17", + "metadata": {}, + "outputs": [], + "source": [ + "# Summary table\n", + "summary = {\n", + " \"Metric\": [\n", + " \"Total hours\",\n", + " \"Total slots\",\n", + " \"Attestations observed\",\n", + " \"Committees (subnets)\",\n", + " \"P50 arrival (ms)\",\n", + " \"P75 arrival (ms)\",\n", + " \"P90 arrival (ms)\",\n", + " \"P95 arrival (ms)\",\n", + " \"P99 arrival (ms)\",\n", + " \"Max arrival (ms)\",\n", + " \"Slow committees (P99 > 1.5x median)\",\n", + " ],\n", + " \"Value\": [\n", + " f\"{total_hours:,}\",\n", + " f\"{total_slots:,}\",\n", + " f\"{total_attestations:,}\",\n", + " f\"{len(df_comm)}\",\n", + " f\"{p50_overall:.0f}\",\n", + " f\"{np.average(df['p75_ms'], weights=df['attestation_count']):.0f}\",\n", + " f\"{p90_overall:.0f}\",\n", + " f\"{np.average(df['p95_ms'], weights=df['attestation_count']):.0f}\",\n", + " f\"{p99_overall:.0f}\",\n", + " f\"{df['max_ms'].max():.0f}\",\n", + " f\"{len(df_slow)}\",\n", + " ],\n", + "}\n", + "\n", + "pd.DataFrame(summary)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "name": "python", + "version": "3.12.0" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/pipeline.yaml b/pipeline.yaml index 99d1dc2..53c7124 100644 --- a/pipeline.yaml +++ b/pipeline.yaml @@ -100,6 +100,24 @@ queries: description: End-to-end block production timing from MEV bidding to column arrival output_file: block_production_timeline.parquet + block_propagation: + module: queries.block_propagation + function: fetch_block_propagation + description: Block propagation latency percentiles by epoch + output_file: block_propagation.parquet + + attestation_propagation: + module: queries.attestation_propagation + function: fetch_attestation_propagation + description: Attestation arrival latency percentiles by epoch + output_file: attestation_propagation.parquet + + attestation_by_committee: + module: queries.attestation_propagation + function: fetch_attestation_by_committee + description: Attestation latency summary by committee (subnet) + output_file: attestation_by_committee.parquet + # ============================================ # Notebook Registry # ============================================ @@ -222,6 +240,35 @@ notebooks: required: true order: 8 + - id: block-propagation + title: Block propagation + description: Block arrival latency from slot start measured across sentry nodes + icon: Radio + source: notebooks/10-block-propagation.ipynb + schedule: daily + queries: + - block_propagation + parameters: + - name: target_date + type: date + required: true + order: 10 + + - id: attestation-arrival + title: Attestation arrival + description: Attestation propagation latency across 64 committee subnets + icon: Users + source: notebooks/11-attestation-arrival.ipynb + schedule: daily + queries: + - attestation_propagation + - attestation_by_committee + parameters: + - name: target_date + type: date + required: true + order: 11 + # Schedule options: hourly, daily, weekly, manual # - hourly: Runs every hour, accumulating data throughout the day # - daily: Runs once per day at 1am UTC diff --git a/queries/attestation_propagation.py b/queries/attestation_propagation.py new file mode 100644 index 0000000..dccb335 --- /dev/null +++ b/queries/attestation_propagation.py @@ -0,0 +1,105 @@ +""" +Fetch functions for attestation arrival latency analysis. + +Each function executes SQL and returns the DataFrame and query string. +""" + + +def _get_date_filter(target_date: str, column: str = "slot_start_date_time") -> str: + """Generate SQL date filter for a specific date.""" + return f"{column} >= '{target_date}' AND {column} < '{target_date}'::date + INTERVAL 1 DAY" + + +def fetch_attestation_propagation( + client, + target_date: str, + network: str = "mainnet", +) -> tuple: + """Fetch attestation arrival latency data aggregated by hour. + + Returns per-hour percentiles (p50, p75, p80, p85, p90, p95, p99) of + attestation arrival times across all committees (subnets). + + Uses hourly aggregation for efficient processing of massive attestation data. + + Returns (df, query). + """ + date_filter = _get_date_filter(target_date) + + query = f""" +SELECT + toStartOfHour(slot_start_date_time) AS hour, + COUNT(*) AS attestation_count, + uniqExact(slot) AS slot_count, + + -- Arrival time percentiles (using propagation_slot_start_diff directly) + quantile(0.50)(propagation_slot_start_diff) AS p50_ms, + quantile(0.75)(propagation_slot_start_diff) AS p75_ms, + quantile(0.80)(propagation_slot_start_diff) AS p80_ms, + quantile(0.85)(propagation_slot_start_diff) AS p85_ms, + quantile(0.90)(propagation_slot_start_diff) AS p90_ms, + quantile(0.95)(propagation_slot_start_diff) AS p95_ms, + quantile(0.99)(propagation_slot_start_diff) AS p99_ms, + + -- Distribution stats + AVG(propagation_slot_start_diff) AS avg_ms, + stddevSamp(propagation_slot_start_diff) AS std_ms, + MIN(propagation_slot_start_diff) AS min_ms, + MAX(propagation_slot_start_diff) AS max_ms + +FROM libp2p_gossipsub_beacon_attestation +WHERE {date_filter} + AND meta_network_name = '{network}' + AND startsWith(meta_client_name, 'ethpandaops/{network}/') +GROUP BY hour +ORDER BY hour +""" + + df = client.query_df(query) + return df, query + + +def fetch_attestation_by_committee( + client, + target_date: str, + network: str = "mainnet", +) -> tuple: + """Fetch attestation arrival latency summarized by committee (subnet). + + Returns per-committee percentiles for the entire day, useful for + identifying slow or problematic subnets. + + Uses 10% sampling to handle the massive attestation data volume efficiently. + + Returns (df, query). + """ + date_filter = _get_date_filter(target_date) + + query = f""" +SELECT + attesting_validator_committee_index AS committee, + COUNT(*) AS attestation_count, + uniqExact(slot) AS slot_count, + + -- Arrival time percentiles + quantile(0.50)(propagation_slot_start_diff) AS p50_ms, + quantile(0.75)(propagation_slot_start_diff) AS p75_ms, + quantile(0.90)(propagation_slot_start_diff) AS p90_ms, + quantile(0.95)(propagation_slot_start_diff) AS p95_ms, + quantile(0.99)(propagation_slot_start_diff) AS p99_ms, + + -- Stats for comparison + AVG(propagation_slot_start_diff) AS avg_ms, + MAX(propagation_slot_start_diff) AS max_ms + +FROM libp2p_gossipsub_beacon_attestation +WHERE {date_filter} + AND meta_network_name = '{network}' + AND startsWith(meta_client_name, 'ethpandaops/{network}/') + AND attesting_validator_committee_index != '' +GROUP BY attesting_validator_committee_index +ORDER BY toInt32OrNull(attesting_validator_committee_index) +""" + + df = client.query_df(query) + return df, query diff --git a/queries/block_propagation.py b/queries/block_propagation.py new file mode 100644 index 0000000..916499d --- /dev/null +++ b/queries/block_propagation.py @@ -0,0 +1,87 @@ +""" +Fetch functions for block propagation latency analysis. + +Each function executes SQL and returns the DataFrame and query string. +""" + + +def _get_date_filter(target_date: str, column: str = "slot_start_date_time") -> str: + """Generate SQL date filter for a specific date.""" + return f"{column} >= '{target_date}' AND {column} < '{target_date}'::date + INTERVAL 1 DAY" + + +def fetch_block_propagation( + client, + target_date: str, + network: str = "mainnet", +) -> tuple: + """Fetch block propagation latency data aggregated by epoch. + + Returns per-epoch percentiles (p50, p75, p80, p85, p90, p95, p99) of block + arrival times, plus spread metrics (max-min across sentry nodes per slot). + + Returns (df, query). + """ + date_filter = _get_date_filter(target_date) + + query = f""" +WITH first_seen_per_node AS ( + -- Get first observation of each block per sentry node + SELECT + slot, + slot_start_date_time, + epoch, + meta_client_name AS node_name, + MIN(propagation_slot_start_diff) AS arrival_ms + FROM libp2p_gossipsub_beacon_block FINAL + WHERE {date_filter} + AND meta_network_name = '{network}' + AND startsWith(meta_client_name, 'ethpandaops/{network}/') + GROUP BY slot, slot_start_date_time, epoch, meta_client_name +), +slot_stats AS ( + -- Per-slot: first arrival (fastest node) and spread (slowest - fastest) + SELECT + slot, + slot_start_date_time, + epoch, + MIN(arrival_ms) AS first_arrival_ms, + MAX(arrival_ms) AS last_arrival_ms, + MAX(arrival_ms) - MIN(arrival_ms) AS spread_ms, + COUNT(DISTINCT node_name) AS node_count + FROM first_seen_per_node + GROUP BY slot, slot_start_date_time, epoch +) +SELECT + epoch, + MIN(slot_start_date_time) AS epoch_start, + COUNT(*) AS slot_count, + AVG(node_count) AS avg_nodes, + + -- Arrival time percentiles (time from slot start to first observation) + quantile(0.50)(first_arrival_ms) AS p50_ms, + quantile(0.75)(first_arrival_ms) AS p75_ms, + quantile(0.80)(first_arrival_ms) AS p80_ms, + quantile(0.85)(first_arrival_ms) AS p85_ms, + quantile(0.90)(first_arrival_ms) AS p90_ms, + quantile(0.95)(first_arrival_ms) AS p95_ms, + quantile(0.99)(first_arrival_ms) AS p99_ms, + + -- Spread percentiles (time for all nodes to see the block) + quantile(0.50)(spread_ms) AS spread_p50_ms, + quantile(0.75)(spread_ms) AS spread_p75_ms, + quantile(0.90)(spread_ms) AS spread_p90_ms, + quantile(0.95)(spread_ms) AS spread_p95_ms, + quantile(0.99)(spread_ms) AS spread_p99_ms, + + -- Tail analysis + MAX(first_arrival_ms) AS max_arrival_ms, + MAX(spread_ms) AS max_spread_ms + +FROM slot_stats +GROUP BY epoch +ORDER BY epoch +""" + + df = client.query_df(query) + return df, query diff --git a/site/src/components/Icon.astro b/site/src/components/Icon.astro index 5b8b505..617c349 100644 --- a/site/src/components/Icon.astro +++ b/site/src/components/Icon.astro @@ -1,5 +1,5 @@ --- -import { Activity, AlertCircle, AlertTriangle, Calendar, ChevronLeft, ChevronRight, Eye, FileText, Gavel, Grid3x3, Layers, Link, List, PanelLeft, Timer, XCircle } from 'lucide-react'; +import { Activity, AlertCircle, AlertTriangle, Calendar, ChevronLeft, ChevronRight, Eye, FileText, Gavel, Grid3x3, Layers, Link, List, PanelLeft, Radio, Timer, Users, XCircle } from 'lucide-react'; interface Props { name: string; @@ -27,7 +27,9 @@ const icons: Record = { Link, List, PanelLeft, + Radio, Timer, + Users, XCircle, };