Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
331 changes: 331 additions & 0 deletions notebooks/12-slot-classifier.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,331 @@
{
"cells": [
{
"cell_type": "markdown",
"id": "0",
"metadata": {},
"source": [
"Analysis of Ethereum CL's Attestation Inclusion metrics using the networking events as reference. "
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "1",
"metadata": {
"jupyter": {
"is_executing": true
}
},
"outputs": [],
"source": [
"import polars as pl\n",
"from loaders import load_parquet\n",
"from IPython.display import display\n",
"\n",
"import plotly.express as px\n",
"import plotly.graph_objects as go\n",
"from plotly.subplots import make_subplots\n",
"\n",
"from queries.slot_tags import TAG_ORDERS, TAG_LABELS, TAG_GROUPS, short_label\n",
"\n",
"# Global Variables\n",
"target_date = None # Use this as a default for the automation and the rendering of the page"
]
},
{
"cell_type": "markdown",
"id": "2",
"metadata": {},
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"id": "3",
"metadata": {},
"outputs": [],
"source": [
"# read the parquet file\n",
"df = pl.from_pandas(load_parquet(\"slot_tags\", target_date=target_date))"
]
},
{
"cell_type": "markdown",
"id": "4",
"metadata": {},
"source": [
"# Tag Distribution Overview — blocks, attestations, aggregations"
]
},
{
"cell_type": "markdown",
"id": "5",
"metadata": {},
"source": [
"## Tag Distribution Overview\n",
"\n",
"Percentage of slots with each tag value, broken down per dimension and grouped by category (blocks, attestations, aggregations)."
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "6",
"metadata": {},
"outputs": [],
"source": [
"total = len(df)\n",
"\n",
"def plot_tag_distribution(tag_cols: list[str], title: str) -> None:\n",
" n = len(tag_cols)\n",
" row_height = 300\n",
" fig = make_subplots(\n",
" rows=n,\n",
" cols=1,\n",
" subplot_titles=[TAG_LABELS[c] for c in tag_cols],\n",
" vertical_spacing=80 / (row_height * n), # fixed 80px gap between subplots\n",
" )\n",
" for i, col in enumerate(tag_cols, 1):\n",
" present = set(df[col].unique().to_list())\n",
" order = [v for v in TAG_ORDERS[col] if v in present]\n",
" counts = (\n",
" df\n",
" .group_by(col)\n",
" .agg(count=pl.len())\n",
" .with_columns(pct=(pl.col(\"count\") * 100 / total))\n",
" .with_columns(pl.col(col).cast(pl.Enum(order)))\n",
" .sort(col)\n",
" )\n",
" pct_values = counts[\"pct\"].to_list()\n",
" fig.add_trace(\n",
" go.Bar(\n",
" x=[short_label(v) for v in counts[col].to_list()],\n",
" y=pct_values,\n",
" text=[f\"{v:.1f}%\" for v in pct_values],\n",
" textposition=\"outside\",\n",
" showlegend=False,\n",
" marker_color=\"#6366f1\",\n",
" ),\n",
" row=i,\n",
" col=1,\n",
" )\n",
" # Extend y-axis range so outside labels stay inside the plot area,\n",
" # preventing the hover tooltip from disappearing when the cursor is over them.\n",
" fig.update_yaxes(title_text=\"% slots\", range=[0, max(pct_values) * 1.25], row=i, col=1)\n",
" fig.update_layout(\n",
" title=title,\n",
" height=row_height * n,\n",
" width=1000,\n",
" margin=dict(t=80, b=40),\n",
" hovermode=\"x\",\n",
" )\n",
" fig.show()\n",
"\n",
"\n",
"for group_name, cols in TAG_GROUPS.items():\n",
" plot_tag_distribution(cols, f\"Slot Tag Distributions — {group_name}\")"
]
},
{
"cell_type": "markdown",
"id": "7",
"metadata": {},
"source": [
"## Cross-Dimension Co-occurrence Heatmaps\n",
"\n",
"For each pair of dimensions, cells show the percentage of slots in row-category X that also have column-category Y. Rows sum to 100%."
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "8",
"metadata": {},
"outputs": [],
"source": [
"def cross_tab_heatmap(df: pl.DataFrame, col_x: str, col_y: str) -> go.Figure:\n",
" \"\"\"Row-normalized cross-tab: for each value of col_x, % breakdown by col_y.\"\"\"\n",
" present_x = set(df[col_x].unique().to_list())\n",
" present_y = set(df[col_y].unique().to_list())\n",
" order_x = [v for v in TAG_ORDERS[col_x] if v in present_x]\n",
" order_y = [v for v in TAG_ORDERS[col_y] if v in present_y]\n",
"\n",
" ct = df.group_by([col_x, col_y]).agg(count=pl.len())\n",
" totals = ct.group_by(col_x).agg(total=pl.col(\"count\").sum())\n",
" ct = ct.join(totals, on=col_x).with_columns(pct=(pl.col(\"count\") * 100 / pl.col(\"total\")))\n",
"\n",
" matrix = []\n",
" for x in order_x:\n",
" row = []\n",
" for y in order_y:\n",
" cell = ct.filter((pl.col(col_x) == x) & (pl.col(col_y) == y))\n",
" row.append(round(cell[\"pct\"][0], 1) if len(cell) > 0 else 0.0)\n",
" matrix.append(row)\n",
"\n",
" fig = go.Figure(go.Heatmap(\n",
" z=matrix,\n",
" x=[short_label(v) for v in order_y],\n",
" y=[short_label(v) for v in order_x],\n",
" colorscale=\"Blues\",\n",
" zmin=0,\n",
" zmax=100,\n",
" text=[[f\"{v:.1f}%\" for v in row] for row in matrix],\n",
" texttemplate=\"%{text}\",\n",
" hoverongaps=False,\n",
" colorbar=dict(title=\"% of row\"),\n",
" ))\n",
" fig.update_layout(\n",
" title=f\"{TAG_LABELS[col_x]} vs {TAG_LABELS[col_y]}\",\n",
" xaxis_title=TAG_LABELS[col_y],\n",
" yaxis_title=TAG_LABELS[col_x],\n",
" width=900,\n",
" height=350,\n",
" margin=dict(l=200, b=140),\n",
" )\n",
" return fig\n",
"\n",
"\n",
"for col_x, col_y in [\n",
" (\"block_proposal_tag\", \"block_p50_spread_tag\"),\n",
" (\"block_size_tag\", \"block_p50_spread_tag\"),\n",
" (\"blob_count_tag\", \"block_p50_spread_tag\"),\n",
" (\"block_proposal_tag\", \"block_p50_arrival_tag\"),\n",
"]:\n",
" cross_tab_heatmap(df, col_x, col_y).show()\n",
"\n",
"for col_x, col_y in [\n",
" (\"block_proposal_tag\", \"col_first_seen_p50_tag\"),\n",
" (\"blob_count_tag\", \"col_first_seen_p50_tag\"),\n",
" (\"col_first_seen_p50_tag\", \"col_spread_p50_tag\"),\n",
"]:\n",
" cross_tab_heatmap(df, col_x, col_y).show()\n",
"\n",
"for col_x, col_y in [\n",
" (\"block_proposal_tag\", \"att_first_seen_p50_tag\"),\n",
" (\"att_first_seen_p50_tag\", \"att_spread_p50_tag\"),\n",
" (\"att_first_seen_p50_tag\", \"att_inclusion_p50_tag\"),\n",
" (\"att_spread_p50_tag\", \"att_inclusion_p50_tag\"),\n",
"]:\n",
" cross_tab_heatmap(df, col_x, col_y).show()\n",
"\n",
"for col_x, col_y in [\n",
" (\"block_proposal_tag\", \"agg_first_seen_p50_tag\"),\n",
" (\"agg_first_seen_p50_tag\", \"agg_spread_p50_tag\"),\n",
" (\"att_first_seen_p50_tag\", \"agg_first_seen_p50_tag\"),\n",
"]:\n",
" cross_tab_heatmap(df, col_x, col_y).show()"
]
},
{
"cell_type": "markdown",
"id": "9",
"metadata": {},
"source": [
"## Slot Tag Flow (Sankey)\n",
"\n",
"Shows how slots flow across dimensions: **Blob Count → Block Size → Proposal Timing → Broadcast Speed**. Width of each band is proportional to the number of slots."
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "10",
"metadata": {},
"outputs": [],
"source": [
"def build_sankey(df: pl.DataFrame, cols: list[str], title: str = \"\") -> go.Figure:\n",
" node_labels: list[str] = []\n",
" node_index: dict[tuple[str, str], int] = {}\n",
"\n",
" for col in cols:\n",
" present = set(df[col].unique().to_list())\n",
" for val in TAG_ORDERS[col]:\n",
" if val in present:\n",
" node_index[(col, val)] = len(node_labels)\n",
" node_labels.append(f\"{TAG_LABELS[col]}: {short_label(val)}\")\n",
"\n",
" sources, targets, values = [], [], []\n",
"\n",
" for i in range(len(cols) - 1):\n",
" col_a, col_b = cols[i], cols[i + 1]\n",
" flow = df.group_by([col_a, col_b]).agg(count=pl.len())\n",
" for row in flow.iter_rows(named=True):\n",
" a = node_index.get((col_a, row[col_a]))\n",
" b = node_index.get((col_b, row[col_b]))\n",
" if a is not None and b is not None:\n",
" sources.append(a)\n",
" targets.append(b)\n",
" values.append(row[\"count\"])\n",
"\n",
" fig = go.Figure(go.Sankey(\n",
" arrangement=\"snap\",\n",
" node=dict(\n",
" label=node_labels,\n",
" pad=20,\n",
" thickness=18,\n",
" color=\"#6366f1\",\n",
" ),\n",
" link=dict(\n",
" source=sources,\n",
" target=targets,\n",
" value=values,\n",
" color=\"rgba(99,102,241,0.25)\",\n",
" ),\n",
" ))\n",
" fig.update_layout(\n",
" title=title,\n",
" width=1200,\n",
" height=700,\n",
" )\n",
" return fig\n",
"\n",
"\n",
"build_sankey(\n",
" df,\n",
" [\"block_proposal_tag\", \"block_size_tag\", \"blob_count_tag\", \"block_p50_arrival_tag\", \"block_p50_spread_tag\"],\n",
" title=\"Slot Tag Flow: Blob Count → Block Size → Proposal Timing → Arrival → Broadcast Speed\",\n",
").show()\n",
"\n",
"build_sankey(\n",
" df,\n",
" [\"block_proposal_tag\", \"blob_count_tag\", \"col_first_seen_p50_tag\", \"col_spread_p50_tag\"],\n",
" title=\"Data Column Tag Flow (P50): Block Proposal → Blob Count → First Seen → Spread\",\n",
").show()\n",
"\n",
"build_sankey(\n",
" df,\n",
" [\"block_proposal_tag\", \"att_first_seen_p50_tag\", \"att_spread_p50_tag\", \"att_inclusion_p50_tag\"],\n",
" title=\"Attestation Tag Flow (P50): Block Proposal → First Seen → Spread → Inclusion Delay\",\n",
").show()\n",
"\n",
"build_sankey(\n",
" df,\n",
" [\"block_proposal_tag\", \"agg_first_seen_p50_tag\", \"agg_spread_p50_tag\"],\n",
" title=\"Aggregation Tag Flow (P50): Block Proposal → First Seen → Spread\",\n",
").show()"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 2
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython2",
"version": "2.7.6"
}
},
"nbformat": 4,
"nbformat_minor": 5
}
26 changes: 11 additions & 15 deletions pipeline.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -112,12 +112,11 @@ queries:
description: Block propagation by geographic region from Sentries
output_file: block_propagation_by_region.parquet

block_propagation_by_region_contributoor:
module: queries.block_propagation_contributoor
function: fetch_block_propagation_by_region_contributoor
database: contributoor
description: Block propagation by geographic region from Contributoor nodes
output_file: block_propagation_by_region_contributoor.parquet
slot_tags:
module: queries.slot_tagger
function: fetch_slot_tags
description: Classify each slot for what happened based on the events
output_file: slot_tags.parquet

# ============================================
# Notebook Registry
Expand Down Expand Up @@ -241,22 +240,19 @@ notebooks:
required: true
order: 8

- id: block-propagation-size
title: Block propagation
description: Block propagation timing by size with corrected MEV timing that isolates network latency from block building
- id: slot-tags
title: Slot types
description: Visualize and aggregate metrics based on what happened on each slot
icon: Gauge
source: notebooks/09-block-propagation-size.ipynb
source: notebooks/12-slot-classifier.ipynb
schedule: daily
queries:
- block_propagation_by_size
- block_production_timeline
- block_propagation_by_region
- block_propagation_by_region_contributoor
- slot_tags
parameters:
- name: target_date
type: date
required: true
order: 9
order: 12

# Schedule options: hourly, daily, weekly, manual
# - hourly: Runs every hour, accumulating data throughout the day
Expand Down
7 changes: 7 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,15 @@ dependencies = [
"boto3>=1.35.0",
"scipy>=1.16.3",
"statsmodels>=0.14.6",
"polars>=1.38.1",
]

[tool.uv]
package = true

[tool.setuptools.packages.find]
include = ["queries*"]

[dependency-groups]
dev = [
"ipykernel>=7.1.0",
Expand Down
Loading
Loading