From faa0c0eb6a90e2092f3fb0c9c4725dd3b9797dad Mon Sep 17 00:00:00 2001 From: SibtainOcn <224004177+SibtainOcn@users.noreply.github.com> Date: Fri, 6 Mar 2026 02:55:01 +0530 Subject: [PATCH 1/7] fix: exclude JWT token from workload repr to prevent log exposure Prevents JWT tokens from leaking into task logs by setting repr=False on the token field in BaseWorkloadSchema. When workload objects are logged (e.g. in execute_workload.py), Pydantic's auto-generated __repr__ previously included the raw JWT token string. This is a security concern as tokens should never appear in log output. The fix uses Pydantic's Field(repr=False) to exclude the token from string representations while keeping it fully accessible as an attribute. Fixes: #62428 --- airflow-core/newsfragments/62428.bugfix.rst | 1 + .../src/airflow/executors/workloads/base.py | 4 +- .../tests/unit/executors/test_workloads.py | 43 +++++++++++++++++++ 3 files changed, 46 insertions(+), 2 deletions(-) create mode 100644 airflow-core/newsfragments/62428.bugfix.rst diff --git a/airflow-core/newsfragments/62428.bugfix.rst b/airflow-core/newsfragments/62428.bugfix.rst new file mode 100644 index 0000000000000..048ba18893be4 --- /dev/null +++ b/airflow-core/newsfragments/62428.bugfix.rst @@ -0,0 +1 @@ +Prevent JWT tokens from appearing in task logs by excluding the token field from workload object representations. diff --git a/airflow-core/src/airflow/executors/workloads/base.py b/airflow-core/src/airflow/executors/workloads/base.py index cf622209d67ba..97cf16ebaf64d 100644 --- a/airflow-core/src/airflow/executors/workloads/base.py +++ b/airflow-core/src/airflow/executors/workloads/base.py @@ -22,7 +22,7 @@ from abc import ABC from typing import TYPE_CHECKING -from pydantic import BaseModel, ConfigDict +from pydantic import BaseModel, ConfigDict, Field if TYPE_CHECKING: from airflow.api_fastapi.auth.tokens import JWTGenerator @@ -69,7 +69,7 @@ class BaseWorkloadSchema(BaseModel): model_config = ConfigDict(populate_by_name=True) - token: str + token: str = Field(repr=False) """The identity token for this workload""" @staticmethod diff --git a/airflow-core/tests/unit/executors/test_workloads.py b/airflow-core/tests/unit/executors/test_workloads.py index 8ca86f9704b4b..47dfd674da82d 100644 --- a/airflow-core/tests/unit/executors/test_workloads.py +++ b/airflow-core/tests/unit/executors/test_workloads.py @@ -25,3 +25,46 @@ def test_task_instance_alias_keeps_backwards_compat(): assert TaskInstance is TaskInstanceDTO assert workloads.TaskInstance is TaskInstanceDTO assert workloads.TaskInstanceDTO is TaskInstanceDTO + + +def test_token_excluded_from_workload_repr(): + """Ensure JWT tokens do not leak into log output via repr(). + + Regression test for https://github.com/apache/airflow/issues/62428 + """ + from pathlib import PurePosixPath + from uuid import uuid4 + + from airflow.executors.workloads.base import BundleInfo + from airflow.executors.workloads.task import ExecuteTask, TaskInstanceDTO + + fake_token = "eyJhbGciOiJIUzUxMiIsInR5cCI6IkpXVCJ9.secret_payload.signature" + ti = TaskInstanceDTO( + id=uuid4(), + dag_version_id=uuid4(), + task_id="test_task", + dag_id="test_dag", + run_id="test_run", + try_number=1, + map_index=-1, + pool_slots=1, + queue="default", + priority_weight=1, + ) + workload = ExecuteTask( + ti=ti, + dag_rel_path=PurePosixPath("test_dag.py"), + token=fake_token, + bundle_info=BundleInfo(name="dags-folder", version=None), + log_path="test.log", + ) + + workload_repr = repr(workload) + + # Token MUST NOT appear in repr (prevents leaking into logs) + assert fake_token not in workload_repr, ( + f"JWT token leaked into repr! Found token in: {workload_repr}" + ) + # But token should still be accessible as an attribute + assert workload.token == fake_token + From ec00988eac4a4168b34adbdaacc20e82ea1a004c Mon Sep 17 00:00:00 2001 From: SibtainOcn <224004177+SibtainOcn@users.noreply.github.com> Date: Fri, 6 Mar 2026 03:40:40 +0530 Subject: [PATCH 2/7] fix: move imports to top of file and simplify docstring per review --- .../tests/unit/executors/test_workloads.py | 17 +++--- test_fix_verification.py | 58 +++++++++++++++++++ 2 files changed, 65 insertions(+), 10 deletions(-) create mode 100644 test_fix_verification.py diff --git a/airflow-core/tests/unit/executors/test_workloads.py b/airflow-core/tests/unit/executors/test_workloads.py index 47dfd674da82d..c9738ecb53796 100644 --- a/airflow-core/tests/unit/executors/test_workloads.py +++ b/airflow-core/tests/unit/executors/test_workloads.py @@ -17,8 +17,13 @@ # under the License. from __future__ import annotations +from pathlib import PurePosixPath +from uuid import uuid4 + from airflow.executors import workloads from airflow.executors.workloads import TaskInstance, TaskInstanceDTO +from airflow.executors.workloads.base import BundleInfo +from airflow.executors.workloads.task import ExecuteTask def test_task_instance_alias_keeps_backwards_compat(): @@ -28,16 +33,7 @@ def test_task_instance_alias_keeps_backwards_compat(): def test_token_excluded_from_workload_repr(): - """Ensure JWT tokens do not leak into log output via repr(). - - Regression test for https://github.com/apache/airflow/issues/62428 - """ - from pathlib import PurePosixPath - from uuid import uuid4 - - from airflow.executors.workloads.base import BundleInfo - from airflow.executors.workloads.task import ExecuteTask, TaskInstanceDTO - + """Ensure JWT tokens do not leak into log output via repr().""" fake_token = "eyJhbGciOiJIUzUxMiIsInR5cCI6IkpXVCJ9.secret_payload.signature" ti = TaskInstanceDTO( id=uuid4(), @@ -68,3 +64,4 @@ def test_token_excluded_from_workload_repr(): # But token should still be accessible as an attribute assert workload.token == fake_token + diff --git a/test_fix_verification.py b/test_fix_verification.py new file mode 100644 index 0000000000000..dbc9a20cf87d7 --- /dev/null +++ b/test_fix_verification.py @@ -0,0 +1,58 @@ +""" +Standalone verification that our fix works. +Tests that Pydantic Field(repr=False) hides the JWT token from repr(). +This proves the fix for Apache Airflow issue #62428. +""" +from pydantic import BaseModel, ConfigDict, Field + + +# ---- BEFORE the fix (original code) ---- +class BaseWorkloadSchema_BEFORE(BaseModel): + model_config = ConfigDict(populate_by_name=True) + token: str # No repr=False — token LEAKS into repr + + +# ---- AFTER the fix (our code) ---- +class BaseWorkloadSchema_AFTER(BaseModel): + model_config = ConfigDict(populate_by_name=True) + token: str = Field(repr=False) # repr=False — token HIDDEN + + +FAKE_JWT = "eyJhbGciOiJIUzUxMiIsInR5cCI6IkpXVCJ9.secret_payload.signature" + + +def test_BEFORE_fix_token_leaks(): + """BEFORE our fix: token appears in repr (THE BUG).""" + obj = BaseWorkloadSchema_BEFORE(token=FAKE_JWT) + r = repr(obj) + print(f"\n[BEFORE FIX] repr = {r}") + assert FAKE_JWT in r, "Expected token to LEAK (this is the bug)" + print("[BEFORE FIX] ❌ Token LEAKS into repr — this is the security bug!") + + +def test_AFTER_fix_token_hidden(): + """AFTER our fix: token is hidden from repr (THE FIX).""" + obj = BaseWorkloadSchema_AFTER(token=FAKE_JWT) + r = repr(obj) + print(f"\n[AFTER FIX] repr = {r}") + assert FAKE_JWT not in r, f"Token still leaking! repr = {r}" + print("[AFTER FIX] ✅ Token is HIDDEN from repr — fix works!") + + # But token is still accessible as attribute + assert obj.token == FAKE_JWT + print("[AFTER FIX] ✅ Token still accessible via obj.token — functionality preserved!") + + +if __name__ == "__main__": + print("=" * 60) + print("VERIFICATION: Airflow Issue #62428 Fix") + print("=" * 60) + + test_BEFORE_fix_token_leaks() + print() + test_AFTER_fix_token_hidden() + + print() + print("=" * 60) + print("ALL TESTS PASSED ✅ — Fix is correct!") + print("=" * 60) From fe488e2e73ac470097ca060ffe7ac96209d3d7dd Mon Sep 17 00:00:00 2001 From: SibtainOcn <224004177+SibtainOcn@users.noreply.github.com> Date: Fri, 6 Mar 2026 04:20:06 +0530 Subject: [PATCH 3/7] fix: remove local verification script from PR --- test_fix_verification.py | 58 ---------------------------------------- 1 file changed, 58 deletions(-) delete mode 100644 test_fix_verification.py diff --git a/test_fix_verification.py b/test_fix_verification.py deleted file mode 100644 index dbc9a20cf87d7..0000000000000 --- a/test_fix_verification.py +++ /dev/null @@ -1,58 +0,0 @@ -""" -Standalone verification that our fix works. -Tests that Pydantic Field(repr=False) hides the JWT token from repr(). -This proves the fix for Apache Airflow issue #62428. -""" -from pydantic import BaseModel, ConfigDict, Field - - -# ---- BEFORE the fix (original code) ---- -class BaseWorkloadSchema_BEFORE(BaseModel): - model_config = ConfigDict(populate_by_name=True) - token: str # No repr=False — token LEAKS into repr - - -# ---- AFTER the fix (our code) ---- -class BaseWorkloadSchema_AFTER(BaseModel): - model_config = ConfigDict(populate_by_name=True) - token: str = Field(repr=False) # repr=False — token HIDDEN - - -FAKE_JWT = "eyJhbGciOiJIUzUxMiIsInR5cCI6IkpXVCJ9.secret_payload.signature" - - -def test_BEFORE_fix_token_leaks(): - """BEFORE our fix: token appears in repr (THE BUG).""" - obj = BaseWorkloadSchema_BEFORE(token=FAKE_JWT) - r = repr(obj) - print(f"\n[BEFORE FIX] repr = {r}") - assert FAKE_JWT in r, "Expected token to LEAK (this is the bug)" - print("[BEFORE FIX] ❌ Token LEAKS into repr — this is the security bug!") - - -def test_AFTER_fix_token_hidden(): - """AFTER our fix: token is hidden from repr (THE FIX).""" - obj = BaseWorkloadSchema_AFTER(token=FAKE_JWT) - r = repr(obj) - print(f"\n[AFTER FIX] repr = {r}") - assert FAKE_JWT not in r, f"Token still leaking! repr = {r}" - print("[AFTER FIX] ✅ Token is HIDDEN from repr — fix works!") - - # But token is still accessible as attribute - assert obj.token == FAKE_JWT - print("[AFTER FIX] ✅ Token still accessible via obj.token — functionality preserved!") - - -if __name__ == "__main__": - print("=" * 60) - print("VERIFICATION: Airflow Issue #62428 Fix") - print("=" * 60) - - test_BEFORE_fix_token_leaks() - print() - test_AFTER_fix_token_hidden() - - print() - print("=" * 60) - print("ALL TESTS PASSED ✅ — Fix is correct!") - print("=" * 60) From 5d341f8243b364fd0475b1e68cc3cb3899e3f5a4 Mon Sep 17 00:00:00 2001 From: SibtainOcn <224004177+SibtainOcn@users.noreply.github.com> Date: Fri, 6 Mar 2026 06:22:25 +0530 Subject: [PATCH 4/7] fix: rename newsfragment to use PR number per review --- airflow-core/newsfragments/{62428.bugfix.rst => 62964.bugfix.rst} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename airflow-core/newsfragments/{62428.bugfix.rst => 62964.bugfix.rst} (100%) diff --git a/airflow-core/newsfragments/62428.bugfix.rst b/airflow-core/newsfragments/62964.bugfix.rst similarity index 100% rename from airflow-core/newsfragments/62428.bugfix.rst rename to airflow-core/newsfragments/62964.bugfix.rst From 9c7ed280adbebabc1fad75b07f492f52721ee0c6 Mon Sep 17 00:00:00 2001 From: SibtainOcn <224004177+SibtainOcn@users.noreply.github.com> Date: Fri, 6 Mar 2026 06:55:34 +0530 Subject: [PATCH 5/7] style: fix assert formatting to match Airflow linter --- airflow-core/tests/unit/executors/test_workloads.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/airflow-core/tests/unit/executors/test_workloads.py b/airflow-core/tests/unit/executors/test_workloads.py index c9738ecb53796..3e2abdc51b2d6 100644 --- a/airflow-core/tests/unit/executors/test_workloads.py +++ b/airflow-core/tests/unit/executors/test_workloads.py @@ -58,9 +58,7 @@ def test_token_excluded_from_workload_repr(): workload_repr = repr(workload) # Token MUST NOT appear in repr (prevents leaking into logs) - assert fake_token not in workload_repr, ( - f"JWT token leaked into repr! Found token in: {workload_repr}" - ) + assert fake_token not in workload_repr, f"JWT token leaked into repr! Found token in: {workload_repr}" # But token should still be accessible as an attribute assert workload.token == fake_token From 1629990a764581369eaedbab593a766af52da7b7 Mon Sep 17 00:00:00 2001 From: SibtainOcn <224004177+SibtainOcn@users.noreply.github.com> Date: Fri, 6 Mar 2026 16:53:31 +0530 Subject: [PATCH 6/7] style: remove trailing blank lines per pre-commit hook --- airflow-core/tests/unit/executors/test_workloads.py | 1 - 1 file changed, 1 deletion(-) diff --git a/airflow-core/tests/unit/executors/test_workloads.py b/airflow-core/tests/unit/executors/test_workloads.py index 3e2abdc51b2d6..c40c9975d30cd 100644 --- a/airflow-core/tests/unit/executors/test_workloads.py +++ b/airflow-core/tests/unit/executors/test_workloads.py @@ -62,4 +62,3 @@ def test_token_excluded_from_workload_repr(): # But token should still be accessible as an attribute assert workload.token == fake_token - From 46bb29f9fe25c8011da38abc3aacacba2b25ead2 Mon Sep 17 00:00:00 2001 From: SibtainOcn <224004177+SibtainOcn@users.noreply.github.com> Date: Fri, 6 Mar 2026 21:01:04 +0530 Subject: [PATCH 7/7] style: remove trailing blank line to pass static checks --- airflow-core/tests/unit/executors/test_workloads.py | 1 - 1 file changed, 1 deletion(-) diff --git a/airflow-core/tests/unit/executors/test_workloads.py b/airflow-core/tests/unit/executors/test_workloads.py index c40c9975d30cd..1a67ab96d4073 100644 --- a/airflow-core/tests/unit/executors/test_workloads.py +++ b/airflow-core/tests/unit/executors/test_workloads.py @@ -61,4 +61,3 @@ def test_token_excluded_from_workload_repr(): assert fake_token not in workload_repr, f"JWT token leaked into repr! Found token in: {workload_repr}" # But token should still be accessible as an attribute assert workload.token == fake_token -