diff --git a/plugins/pandera/README.md b/plugins/pandera/README.md
new file mode 100644
index 000000000..a400860c3
--- /dev/null
+++ b/plugins/pandera/README.md
@@ -0,0 +1,63 @@
+# Flyte Pandera Plugin
+
+This plugin integrates [Pandera](https://pandera.readthedocs.io/) with [Flyte](https://flyte.org/), enabling automatic runtime validation of pandas DataFrames against Pandera schemas as data flows between tasks.
+
+## Installation
+
+```bash
+pip install flyteplugins-pandera
+```
+
+## Usage
+
+Define a Pandera schema using `DataFrameModel` and use `pandera.typing.DataFrame` as your task's type annotation:
+
+```python
+import flyte
+import pandas as pd
+import pandera as pa
+from pandera.typing import DataFrame, Series
+
+env = flyte.TaskEnvironment(name="my-env")
+
+
+class UserSchema(pa.DataFrameModel):
+ name: Series[str]
+ age: Series[int] = pa.Field(ge=0, le=120)
+ email: Series[str]
+
+
+@env.task
+async def generate_users() -> DataFrame[UserSchema]:
+ return pd.DataFrame({
+ "name": ["Alice", "Bob"],
+ "age": [25, 30],
+ "email": ["alice@example.com", "bob@example.com"],
+ })
+
+
+@env.task
+async def process_users(df: DataFrame[UserSchema]) -> DataFrame[UserSchema]:
+ df["age"] = df["age"] + 1
+ return df
+```
+
+DataFrames are automatically validated against the schema on both input and output. If validation fails, a detailed error report is generated.
+
+## Configuration
+
+Control validation behavior using `ValidationConfig` with `typing.Annotated`:
+
+```python
+from typing import Annotated
+from flyteplugins.pandera import ValidationConfig
+
+@env.task
+async def lenient_task(
+ df: Annotated[DataFrame[UserSchema], ValidationConfig(on_error="warn")]
+) -> DataFrame[UserSchema]:
+ return df
+```
+
+- `on_error="raise"` (default): Raises an exception on validation failure
+- `on_error="warn"`: Logs a warning and continues with the original data
diff --git a/plugins/pandera/pyproject.toml b/plugins/pandera/pyproject.toml
new file mode 100644
index 000000000..f3b48159a
--- /dev/null
+++ b/plugins/pandera/pyproject.toml
@@ -0,0 +1,73 @@
+[project]
+name = "flyteplugins-pandera"
+dynamic = ["version"]
+description = "Pandera data validation plugin for Flyte"
+readme = "README.md"
+authors = [{ name = "Flyte Contributors", email = "admin@flyte.org" }]
+requires-python = ">=3.10"
+dependencies = [
+ "pandera",
+ "flyte",
+ "great_tables",
+]
+
+[project.entry-points."flyte.plugins.types"]
+pandera = "flyteplugins.pandera.transformer:register_pandera_transformer"
+
+[build-system]
+requires = ["setuptools", "setuptools_scm"]
+build-backend = "setuptools.build_meta"
+
+[tool.setuptools]
+include-package-data = true
+license-files = ["licenses/*.txt", "LICENSE"]
+
+[tool.setuptools.packages.find]
+where = ["src"]
+include = ["flyteplugins*"]
+
+[tool.setuptools_scm]
+root = "../../"
+
+[tool.pytest.ini_options]
+norecursedirs = []
+log_cli = true
+log_cli_level = 20
+markers = []
+asyncio_default_fixture_loop_scope = "function"
+
+[tool.coverage.run]
+branch = true
+
+[tool.ruff]
+line-length = 120
+
+[tool.ruff.lint]
+select = [
+ "E",
+ "W",
+ "F",
+ "I",
+ "PLW",
+ "YTT",
+ "ASYNC",
+ "C4",
+ "T10",
+ "EXE",
+ "ISC",
+ "LOG",
+ "PIE",
+ "Q",
+ "RSE",
+ "FLY",
+ "PGH",
+ "PLC",
+ "PLE",
+ "PLW",
+ "FURB",
+ "RUF",
+]
+ignore = ["PGH003", "PLC0415", "ASYNC240"]
+
+[tool.ruff.lint.per-file-ignores]
+"examples/*" = ["E402"]
diff --git a/plugins/pandera/src/flyteplugins/pandera/__init__.py b/plugins/pandera/src/flyteplugins/pandera/__init__.py
new file mode 100644
index 000000000..2e8759edb
--- /dev/null
+++ b/plugins/pandera/src/flyteplugins/pandera/__init__.py
@@ -0,0 +1,19 @@
+"""
+Pandera data validation plugin for Flyte.
+
+This plugin integrates Pandera's runtime data validation with Flyte's type system,
+enabling automatic validation of pandas DataFrames against Pandera schemas when
+data flows between tasks.
+
+.. autosummary::
+ :template: custom.rst
+ :toctree: generated/
+
+ PanderaTransformer
+ PandasReportRenderer
+ ValidationConfig
+"""
+
+from .config import ValidationConfig as ValidationConfig
+from .renderer import PandasReportRenderer as PandasReportRenderer
+from .transformer import PanderaTransformer as PanderaTransformer
diff --git a/plugins/pandera/src/flyteplugins/pandera/config.py b/plugins/pandera/src/flyteplugins/pandera/config.py
new file mode 100644
index 000000000..68654aa23
--- /dev/null
+++ b/plugins/pandera/src/flyteplugins/pandera/config.py
@@ -0,0 +1,17 @@
+"""Pandera validation configuration."""
+
+from dataclasses import dataclass
+from typing import Literal
+
+
+@dataclass
+class ValidationConfig:
+ """Configuration for Pandera validation behavior.
+
+ Attributes:
+ on_error: Determines how validation errors are handled.
+ "raise" will raise the SchemaError/SchemaErrors exception.
+ "warn" will log a warning and continue with the original data.
+ """
+
+ on_error: Literal["raise", "warn"] = "raise"
diff --git a/plugins/pandera/src/flyteplugins/pandera/renderer.py b/plugins/pandera/src/flyteplugins/pandera/renderer.py
new file mode 100644
index 000000000..96b78cb9e
--- /dev/null
+++ b/plugins/pandera/src/flyteplugins/pandera/renderer.py
@@ -0,0 +1,320 @@
+"""Pandera validation report renderer for Flyte Decks."""
+
+from dataclasses import dataclass
+from typing import TYPE_CHECKING, Optional
+
+from flyte._utils import lazy_module
+
+if TYPE_CHECKING:
+ import great_tables as gt
+ import pandas
+
+ import pandera
+else:
+ gt = lazy_module("great_tables")
+ pandas = lazy_module("pandas")
+ pandera = lazy_module("pandera")
+
+
+SCHEMA_ERROR_KEY = "SCHEMA"
+DATA_ERROR_KEY = "DATA"
+SCHEMA_ERROR_COLUMNS = ["schema", "column", "error_code", "check", "failure_case", "error"]
+DATA_ERROR_COLUMNS = ["schema", "column", "error_code", "check", "index", "failure_case", "error"]
+DATA_ERROR_DISPLAY_ORDER = ["column", "error_code", "percent_valid", "check", "failure_cases", "error"]
+
+DATA_PREVIEW_HEAD = 5
+FAILURE_CASE_LIMIT = 10
+ERROR_COLUMN_MAX_WIDTH = 200
+
+
+@dataclass
+class PandasReport:
+ summary: "pandas.DataFrame"
+ data_preview: "pandas.DataFrame"
+ schema_error_df: Optional["pandas.DataFrame"] = None
+ data_error_df: Optional["pandas.DataFrame"] = None
+
+
+class PandasReportRenderer:
+ """Renders Pandera validation reports as HTML for Flyte Decks."""
+
+ def __init__(self, title: str = "Pandera Error Report"):
+ self._title = title
+
+ def _create_success_report(self, data: "pandas.DataFrame", schema: "pandera.DataFrameSchema") -> PandasReport:
+ summary = pandas.DataFrame(
+ [
+ {"Metadata": "Schema Name", "Value": schema.name},
+ {"Metadata": "Shape", "Value": f"{data.shape[0]} rows x {data.shape[1]} columns"},
+ {"Metadata": "Total schema errors", "Value": 0},
+ {"Metadata": "Total data errors", "Value": 0},
+ {"Metadata": "Schema Object", "Value": f"```\n{schema!r}\n```"},
+ ]
+ )
+ return PandasReport(
+ summary=summary,
+ data_preview=data.head(DATA_PREVIEW_HEAD),
+ )
+
+ @staticmethod
+ def _reshape_long_failure_cases(long_failure_cases: "pandas.DataFrame") -> "pandas.DataFrame":
+ return (
+ long_failure_cases.pivot(
+ index=["schema_context", "check", "index"], columns="column", values="failure_case"
+ )
+ .apply(lambda s: s.to_dict(), axis="columns")
+ .rename("failure_case")
+ .reset_index(["index", "check"])
+ .reset_index(drop=True)[["check", "index", "failure_case"]]
+ )
+
+ def _prepare_data_error_df(
+ self,
+ data: "pandas.DataFrame",
+ data_errors: dict,
+ failure_cases: "pandas.DataFrame",
+ ) -> "pandas.DataFrame":
+ def num_failure_cases(series):
+ return len(series)
+
+ def _failure_cases(series):
+ series = series.astype(str)
+ out = ", ".join(str(x) for x in series.iloc[:FAILURE_CASE_LIMIT])
+ if len(series) > FAILURE_CASE_LIMIT:
+ out += f" ... (+{len(series) - FAILURE_CASE_LIMIT} more)"
+ return out
+
+ data_errors = pandas.concat(pandas.DataFrame(v).assign(error_code=k) for k, v in data_errors.items())
+ long_failure_case_selector = (failure_cases["schema_context"] == "DataFrameSchema") & (
+ failure_cases["column"].notna()
+ )
+ long_failure_cases = failure_cases[long_failure_case_selector]
+
+ data_error_df = [data_errors.merge(failure_cases, how="inner", on=["column", "check"])]
+ if long_failure_cases.shape[0] > 0:
+ reshaped_failure_cases = self._reshape_long_failure_cases(long_failure_cases)
+ long_data_errors = data_errors.assign(
+ column=data_errors.column.where(~(data_errors.column == data_errors.schema), "NA")
+ )
+ data_error_df.append(
+ long_data_errors.merge(reshaped_failure_cases, how="inner", on=["check"]).assign(column="NA")
+ )
+
+ data_error_df = pandas.concat(data_error_df)
+ out_df = (
+ data_error_df[DATA_ERROR_COLUMNS]
+ .groupby(["column", "error_code", "check", "error"])
+ .failure_case.agg([num_failure_cases, _failure_cases])
+ .reset_index()
+ .rename(columns={"_failure_cases": "failure_cases"})
+ .assign(percent_valid=lambda df: 1 - (df["num_failure_cases"] / data.shape[0]))
+ )
+ return out_df
+
+ def _create_error_report(
+ self,
+ data: "pandas.DataFrame",
+ schema: "pandera.DataFrameSchema",
+ error: "pandera.errors.SchemaErrors",
+ ) -> PandasReport:
+ failure_cases = error.failure_cases
+ error_dict = error.args[0]
+
+ schema_errors = error_dict.get(SCHEMA_ERROR_KEY)
+ data_errors = error_dict.get(DATA_ERROR_KEY)
+
+ if schema_errors is None:
+ schema_error_df = None
+ total_schema_errors = 0
+ else:
+ schema_error_df = (
+ pandas.concat(pandas.DataFrame(v).assign(error_code=k) for k, v in schema_errors.items())
+ .merge(failure_cases, how="left", on=["column", "check"])[SCHEMA_ERROR_COLUMNS]
+ .drop(["schema"], axis="columns")
+ )
+ total_schema_errors = schema_error_df.shape[0]
+
+ if data_errors is None:
+ data_error_df = None
+ total_data_errors = 0
+ else:
+ data_error_df = self._prepare_data_error_df(data, data_errors, failure_cases)
+ total_data_errors = data_error_df.shape[0]
+
+ summary = pandas.DataFrame(
+ [
+ {"Metadata": "Schema Name", "Value": schema.name},
+ {"Metadata": "Data Shape", "Value": f"{data.shape[0]} rows x {data.shape[1]} columns"},
+ {"Metadata": "Total schema errors", "Value": total_schema_errors},
+ {"Metadata": "Total data errors", "Value": total_data_errors},
+ {"Metadata": "Schema Object", "Value": f"```\n{error.schema!r}\n```"},
+ ]
+ )
+ return PandasReport(
+ summary=summary,
+ data_preview=data.head(DATA_PREVIEW_HEAD),
+ schema_error_df=schema_error_df,
+ data_error_df=data_error_df,
+ )
+
+ def _format_summary_df(self, df: "pandas.DataFrame") -> str:
+ return (
+ gt.GT(df)
+ .tab_header(
+ title=gt.md("**Summary**"),
+ subtitle="A high-level overview of the schema errors found in the DataFrame.",
+ )
+ .cols_width(cases={"Metadata": "20%", "Value": "80%"})
+ .fmt_markdown(["Value"])
+ .tab_stub(rowname_col="Metadata")
+ .tab_stubhead(label="Metadata")
+ .tab_style(style=gt.style.text(align="left"), locations=gt.loc.header())
+ .tab_style(style=gt.style.fill(color="#f2fae2"), locations=gt.loc.header())
+ .tab_style(
+ style=gt.style.text(weight="bold"),
+ locations=[gt.loc.column_labels(), gt.loc.stubhead(), gt.loc.stub()],
+ )
+ .as_raw_html()
+ )
+
+ def _format_data_preview_df(self, df: "pandas.DataFrame") -> str:
+ return (
+ gt.GT(df)
+ .tab_header(
+ title=gt.md("**Data Preview**"),
+ subtitle=f"A preview of the first {min(DATA_PREVIEW_HEAD, df.shape[0])} rows of the data.",
+ )
+ .tab_style(style=gt.style.text(align="left"), locations=gt.loc.header())
+ .tab_style(style=gt.style.fill(color="#f2fae2"), locations=gt.loc.header())
+ .tab_style(style=gt.style.text(weight="bold"), locations=gt.loc.column_labels())
+ .tab_style(style=gt.style.text(align="left"), locations=[gt.loc.body(), gt.loc.column_labels()])
+ .as_raw_html()
+ )
+
+ @staticmethod
+ def _format_error(x: str) -> str:
+ if len(x) > ERROR_COLUMN_MAX_WIDTH:
+ x = f"{x[:ERROR_COLUMN_MAX_WIDTH]}..."
+ return f"```\n{x}\n```"
+
+ def _format_schema_error_df(self, df: "pandas.DataFrame") -> str:
+ df = df.assign(
+ error=lambda df: df["error"].map(self._format_error),
+ error_code=lambda df: df["error_code"].map(lambda x: f"`{x}`"),
+ check=lambda df: df["check"].map(lambda x: f"`{x}`"),
+ )
+ return (
+ gt.GT(df)
+ .tab_header(
+ title=gt.md("**Schema-level Errors**"),
+ subtitle="Schema-level metadata errors, e.g. column names, dtypes.",
+ )
+ .fmt_markdown(["error_code", "check", "error"])
+ .tab_style(style=gt.style.text(align="left"), locations=gt.loc.header())
+ .tab_style(style=gt.style.fill(color="#f2fae2"), locations=gt.loc.header())
+ .tab_style(style=gt.style.text(weight="bold"), locations=gt.loc.column_labels())
+ .as_raw_html()
+ )
+
+ def _format_data_error_df(self, df: "pandas.DataFrame") -> str:
+ df = df.assign(
+ error=lambda df: df["error"].map(self._format_error),
+ error_code=lambda df: df["error_code"].map(lambda x: f"`{x}`"),
+ check=lambda df: df["check"].map(lambda x: f"`{x}`"),
+ )[DATA_ERROR_DISPLAY_ORDER]
+
+ return (
+ gt.GT(df)
+ .tab_header(
+ title=gt.md("**Data-level Errors**"),
+ subtitle="Data-level value errors, e.g. null values, out-of-range values.",
+ )
+ .fmt_markdown(["error_code", "check", "error"])
+ .fmt_percent("percent_valid", decimals=2)
+ .data_color(columns=["percent_valid"], palette="RdYlGn", domain=[0, 1], alpha=0.2)
+ .tab_stub(groupname_col="column", rowname_col="error_code")
+ .tab_stubhead(label="column")
+ .tab_style(
+ style=gt.style.text(align="left"),
+ locations=[gt.loc.header(), gt.loc.column_labels(), gt.loc.body()],
+ )
+ .tab_style(style=gt.style.text(align="center"), locations=gt.loc.body(columns="percent_valid"))
+ .tab_style(style=gt.style.fill(color="#f2fae2"), locations=gt.loc.header())
+ .tab_style(
+ style=gt.style.text(weight="bold"),
+ locations=[gt.loc.column_labels(), gt.loc.stubhead(), gt.loc.row_groups()],
+ )
+ .tab_style(
+ style=gt.style.fill(color="#f4f4f4"),
+ locations=[gt.loc.row_groups(), gt.loc.stub()],
+ )
+ .as_raw_html()
+ )
+
+ def to_html(
+ self,
+ data: "pandas.DataFrame",
+ schema: "pandera.DataFrameSchema",
+ error: Optional["pandera.errors.SchemaErrors"] = None,
+ ) -> str:
+ error_segments = ""
+ if error is None:
+ report_dfs = self._create_success_report(data, schema)
+ top_message = "Data validation succeeded."
+ else:
+ report_dfs = self._create_error_report(data, schema, error)
+ top_message = "Data validation failed."
+
+ if report_dfs.schema_error_df is not None:
+ error_segments += f"
{self._format_schema_error_df(report_dfs.schema_error_df)}"
+ if report_dfs.data_error_df is not None:
+ error_segments += f"
{self._format_data_error_df(report_dfs.data_error_df)}"
+
+ return f"""
+
+
+
+