Handle empty HUGGING_FACE_TOKEN gracefully (#422)

MaxGhenis · claude · web-flow · commit 49eabd7f0a4f · 2026-01-17T13:14:34.000-05:00
- Treat empty string same as None (fixes CI with missing secrets) - Check os.isatty(0) before prompting - return None in non-interactive environments instead of hanging on getpass - Update return type to str | None This fixes the 'httpx.LocalProtocolError: Illegal header value b'Bearer '' error that occurs when Dependabot PRs run without access to secrets. Closes PolicyEngine/policyengine-uk#1479 Co-authored-by: Claude Opus 4.5 <noreply@anthropic.com>
diff --git a/_build/logs/myst.build.json b/_build/logs/myst.build.json
@@ -0,0 +1,29 @@
+{
+  "input": {
+    "files": [
+      "docs"
+    ],
+    "opts": {
+      "execute": false,
+      "pdf": false,
+      "tex": false,
+      "typst": false,
+      "docx": false,
+      "md": false,
+      "xml": false,
+      "meca": false,
+      "cff": false,
+      "site": false,
+      "html": false,
+      "all": false,
+      "doiBib": false,
+      "watch": false,
+      "force": false,
+      "checkLinks": false,
+      "strict": false,
+      "ci": false,
+      "maxSizeWebp": 1572864
+    }
+  },
+  "exports": []
+}
diff --git a/changelog_entry.yaml b/changelog_entry.yaml
@@ -0,0 +1,4 @@
+- bump: patch
+  changes:
+    fixed:
+      - Handle empty HUGGING_FACE_TOKEN gracefully in CI environments. Empty strings are now treated the same as None, and non-interactive environments (like CI) return None instead of hanging on getpass prompt.
diff --git a/policyengine_core/tools/hugging_face.py b/policyengine_core/tools/hugging_face.py
@@ -91,21 +91,32 @@ def download_huggingface_dataset(
     )
 
 
-def get_or_prompt_hf_token() -> str:
+def get_or_prompt_hf_token() -> str | None:
     """
     Either get the Hugging Face token from the environment,
     or prompt the user for it and store it in the environment.
 
     Returns:
-        str: The Hugging Face token.
+        str | None: The Hugging Face token, or None if not available
+        and running non-interactively (e.g., in CI without secrets).
     """
 
     token = os.environ.get("HUGGING_FACE_TOKEN")
-    if token is None:
-        token = getpass(
-            "Enter your Hugging Face token (or set HUGGING_FACE_TOKEN environment variable): "
-        )
-        # Optionally store in env for subsequent calls in same session
-        os.environ["HUGGING_FACE_TOKEN"] = token
+    # Treat empty string same as None (handles CI with missing secrets)
+    if not token:
+        # Check if running interactively before prompting
+        if os.isatty(0):
+            token = getpass(
+                "Enter your Hugging Face token (or set HUGGING_FACE_TOKEN environment variable): "
+            )
+            # Store in env for subsequent calls in same session
+            if token:
+                os.environ["HUGGING_FACE_TOKEN"] = token
+            else:
+                # User entered empty string - return None
+                return None
+        else:
+            # Non-interactive (CI) - return None instead of prompting
+            return None
 
     return token
diff --git a/tests/core/test_microsimulation_person_accessor.py b/tests/core/test_microsimulation_person_accessor.py
@@ -0,0 +1,93 @@
+"""
+Test for Python 3.14 compatibility issue #407.
+
+The person() accessor in Microsimulation should return unweighted values
+(numpy arrays), not weighted MicroSeries. This test verifies that the
+isinstance check correctly identifies Microsimulation instances.
+
+Issue #407 reports that in Python 3.14, person() returns unweighted values
+when it previously returned weighted values in Python 3.13. This test
+ensures consistent behavior across Python versions.
+"""
+
+import numpy as np
+import pytest
+from microdf import MicroSeries
+
+from policyengine_core.country_template import Microsimulation
+
+
+class TestMicrosimulationPersonAccessor:
+    """Tests for person() accessor behavior in Microsimulation."""
+
+    def test_person_accessor_returns_unweighted_in_microsimulation(self):
+        """
+        Verify that person() accessor returns unweighted numpy arrays.
+
+        The person() accessor is used internally in formulas and should return
+        unweighted values for performance. This is the intended behavior per
+        the code comment: "Internal simulation code shouldn't use weights in
+        order to avoid performance slowdowns."
+        """
+        sim = Microsimulation()
+        result = sim.person("salary", "2022-01")
+
+        # The result should be a numpy array, not MicroSeries
+        assert isinstance(result, np.ndarray), (
+            f"Expected numpy.ndarray but got {type(result).__name__}. "
+            "person() should return unweighted arrays for performance."
+        )
+
+    def test_calculate_returns_weighted_microseries(self):
+        """
+        Verify that sim.calculate() returns weighted MicroSeries by default.
+
+        This is the expected behavior for user-facing calculations.
+        """
+        sim = Microsimulation()
+        result = sim.calculate("salary", "2022-01")
+
+        assert isinstance(result, MicroSeries), (
+            f"Expected MicroSeries but got {type(result).__name__}. "
+            "sim.calculate() should return weighted MicroSeries by default."
+        )
+
+    def test_isinstance_check_works_for_microsimulation(self):
+        """
+        Directly test that isinstance check works for Microsimulation.
+
+        This ensures the isinstance check in Population.__call__ correctly
+        identifies Microsimulation instances across Python versions.
+        """
+        from policyengine_core.simulations.microsimulation import (
+            Microsimulation as CoreMicrosimulation,
+        )
+
+        sim = Microsimulation()
+
+        assert isinstance(sim, CoreMicrosimulation), (
+            f"isinstance(sim, Microsimulation) returned False. "
+            f"sim type: {type(sim)}, MRO: {type(sim).__mro__}"
+        )
+
+    def test_person_accessor_kwargs_passed_correctly(self):
+        """
+        Test that the person() accessor passes the correct kwargs to calculate().
+
+        This test verifies that use_weights=False is passed to avoid
+        performance issues in internal calculations.
+        """
+        sim = Microsimulation()
+
+        # Call person() which should pass use_weights=False
+        result_person = sim.person("salary", "2022-01")
+
+        # Call calculate() with use_weights=False directly
+        result_calculate = sim.calculate(
+            "salary", "2022-01", use_weights=False
+        )
+
+        # Both should return numpy arrays with the same values
+        assert isinstance(result_person, np.ndarray)
+        assert isinstance(result_calculate, np.ndarray)
+        np.testing.assert_array_equal(result_person, result_calculate)
diff --git a/tests/core/tools/test_hugging_face.py b/tests/core/tools/test_hugging_face.py
@@ -124,39 +124,65 @@ def test_get_token_from_user_input(self):
         """Test retrieving token via user input when not in environment"""
         test_token = "user_input_token_456"
 
-        # Mock both empty environment and user input
+        # Mock empty environment, interactive mode, and user input
         with patch.dict(os.environ, {}, clear=True):
-            with patch(
-                "policyengine_core.tools.hugging_face.getpass",
-                return_value=test_token,
-            ):
-                result = get_or_prompt_hf_token()
-                assert result == test_token
+            with patch("os.isatty", return_value=True):
+                with patch(
+                    "policyengine_core.tools.hugging_face.getpass",
+                    return_value=test_token,
+                ):
+                    result = get_or_prompt_hf_token()
+                    assert result == test_token
 
-                # Verify token was stored in environment
-                assert os.environ.get("HUGGING_FACE_TOKEN") == test_token
+                    # Verify token was stored in environment
+                    assert os.environ.get("HUGGING_FACE_TOKEN") == test_token
 
     def test_empty_user_input(self):
-        """Test handling of empty user input"""
+        """Test handling of empty user input in interactive mode"""
         with patch.dict(os.environ, {}, clear=True):
-            with patch(
-                "policyengine_core.tools.hugging_face.getpass", return_value=""
-            ):
+            with patch("os.isatty", return_value=True):
+                with patch(
+                    "policyengine_core.tools.hugging_face.getpass",
+                    return_value="",
+                ):
+                    result = get_or_prompt_hf_token()
+                    # Empty input should return None (not stored)
+                    assert result is None
+                    # Empty token should not be stored
+                    assert os.environ.get("HUGGING_FACE_TOKEN") is None
+
+    def test_non_interactive_mode_returns_none(self):
+        """Test that non-interactive mode (CI) returns None without prompting"""
+        with patch.dict(os.environ, {}, clear=True):
+            with patch("os.isatty", return_value=False):
+                with patch(
+                    "policyengine_core.tools.hugging_face.getpass"
+                ) as mock_getpass:
+                    result = get_or_prompt_hf_token()
+                    assert result is None
+                    # getpass should not be called in non-interactive mode
+                    mock_getpass.assert_not_called()
+
+    def test_empty_env_token_treated_as_none(self):
+        """Test that empty string token in env is treated as missing"""
+        with patch.dict(os.environ, {"HUGGING_FACE_TOKEN": ""}, clear=True):
+            with patch("os.isatty", return_value=False):
                 result = get_or_prompt_hf_token()
-                assert result == ""
-                assert os.environ.get("HUGGING_FACE_TOKEN") == ""
+                # Empty string should be treated as None
+                assert result is None
 
     def test_environment_variable_persistence(self):
         """Test that environment variable persists across multiple calls"""
         test_token = "persistence_test_token"
 
-        # First call with no environment variable
+        # First call with no environment variable (interactive mode)
         with patch.dict(os.environ, {}, clear=True):
-            with patch(
-                "policyengine_core.tools.hugging_face.getpass",
-                return_value=test_token,
-            ):
-                first_result = get_or_prompt_hf_token()
+            with patch("os.isatty", return_value=True):
+                with patch(
+                    "policyengine_core.tools.hugging_face.getpass",
+                    return_value=test_token,
+                ):
+                    first_result = get_or_prompt_hf_token()
 
             # Second call should use environment variable
             second_result = get_or_prompt_hf_token()