Skip to content

Commit 49eabd7

Browse files
MaxGhenisclaude
andauthored
Handle empty HUGGING_FACE_TOKEN gracefully (#422)
- Treat empty string same as None (fixes CI with missing secrets) - Check os.isatty(0) before prompting - return None in non-interactive environments instead of hanging on getpass - Update return type to str | None This fixes the 'httpx.LocalProtocolError: Illegal header value b'Bearer '' error that occurs when Dependabot PRs run without access to secrets. Closes PolicyEngine/policyengine-uk#1479 Co-authored-by: Claude Opus 4.5 <noreply@anthropic.com>
1 parent f29a63d commit 49eabd7

5 files changed

Lines changed: 192 additions & 29 deletions

File tree

_build/logs/myst.build.json

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
{
2+
"input": {
3+
"files": [
4+
"docs"
5+
],
6+
"opts": {
7+
"execute": false,
8+
"pdf": false,
9+
"tex": false,
10+
"typst": false,
11+
"docx": false,
12+
"md": false,
13+
"xml": false,
14+
"meca": false,
15+
"cff": false,
16+
"site": false,
17+
"html": false,
18+
"all": false,
19+
"doiBib": false,
20+
"watch": false,
21+
"force": false,
22+
"checkLinks": false,
23+
"strict": false,
24+
"ci": false,
25+
"maxSizeWebp": 1572864
26+
}
27+
},
28+
"exports": []
29+
}

changelog_entry.yaml

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
- bump: patch
2+
changes:
3+
fixed:
4+
- Handle empty HUGGING_FACE_TOKEN gracefully in CI environments. Empty strings are now treated the same as None, and non-interactive environments (like CI) return None instead of hanging on getpass prompt.

policyengine_core/tools/hugging_face.py

Lines changed: 19 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -91,21 +91,32 @@ def download_huggingface_dataset(
9191
)
9292

9393

94-
def get_or_prompt_hf_token() -> str:
94+
def get_or_prompt_hf_token() -> str | None:
9595
"""
9696
Either get the Hugging Face token from the environment,
9797
or prompt the user for it and store it in the environment.
9898
9999
Returns:
100-
str: The Hugging Face token.
100+
str | None: The Hugging Face token, or None if not available
101+
and running non-interactively (e.g., in CI without secrets).
101102
"""
102103

103104
token = os.environ.get("HUGGING_FACE_TOKEN")
104-
if token is None:
105-
token = getpass(
106-
"Enter your Hugging Face token (or set HUGGING_FACE_TOKEN environment variable): "
107-
)
108-
# Optionally store in env for subsequent calls in same session
109-
os.environ["HUGGING_FACE_TOKEN"] = token
105+
# Treat empty string same as None (handles CI with missing secrets)
106+
if not token:
107+
# Check if running interactively before prompting
108+
if os.isatty(0):
109+
token = getpass(
110+
"Enter your Hugging Face token (or set HUGGING_FACE_TOKEN environment variable): "
111+
)
112+
# Store in env for subsequent calls in same session
113+
if token:
114+
os.environ["HUGGING_FACE_TOKEN"] = token
115+
else:
116+
# User entered empty string - return None
117+
return None
118+
else:
119+
# Non-interactive (CI) - return None instead of prompting
120+
return None
110121

111122
return token
Lines changed: 93 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,93 @@
1+
"""
2+
Test for Python 3.14 compatibility issue #407.
3+
4+
The person() accessor in Microsimulation should return unweighted values
5+
(numpy arrays), not weighted MicroSeries. This test verifies that the
6+
isinstance check correctly identifies Microsimulation instances.
7+
8+
Issue #407 reports that in Python 3.14, person() returns unweighted values
9+
when it previously returned weighted values in Python 3.13. This test
10+
ensures consistent behavior across Python versions.
11+
"""
12+
13+
import numpy as np
14+
import pytest
15+
from microdf import MicroSeries
16+
17+
from policyengine_core.country_template import Microsimulation
18+
19+
20+
class TestMicrosimulationPersonAccessor:
21+
"""Tests for person() accessor behavior in Microsimulation."""
22+
23+
def test_person_accessor_returns_unweighted_in_microsimulation(self):
24+
"""
25+
Verify that person() accessor returns unweighted numpy arrays.
26+
27+
The person() accessor is used internally in formulas and should return
28+
unweighted values for performance. This is the intended behavior per
29+
the code comment: "Internal simulation code shouldn't use weights in
30+
order to avoid performance slowdowns."
31+
"""
32+
sim = Microsimulation()
33+
result = sim.person("salary", "2022-01")
34+
35+
# The result should be a numpy array, not MicroSeries
36+
assert isinstance(result, np.ndarray), (
37+
f"Expected numpy.ndarray but got {type(result).__name__}. "
38+
"person() should return unweighted arrays for performance."
39+
)
40+
41+
def test_calculate_returns_weighted_microseries(self):
42+
"""
43+
Verify that sim.calculate() returns weighted MicroSeries by default.
44+
45+
This is the expected behavior for user-facing calculations.
46+
"""
47+
sim = Microsimulation()
48+
result = sim.calculate("salary", "2022-01")
49+
50+
assert isinstance(result, MicroSeries), (
51+
f"Expected MicroSeries but got {type(result).__name__}. "
52+
"sim.calculate() should return weighted MicroSeries by default."
53+
)
54+
55+
def test_isinstance_check_works_for_microsimulation(self):
56+
"""
57+
Directly test that isinstance check works for Microsimulation.
58+
59+
This ensures the isinstance check in Population.__call__ correctly
60+
identifies Microsimulation instances across Python versions.
61+
"""
62+
from policyengine_core.simulations.microsimulation import (
63+
Microsimulation as CoreMicrosimulation,
64+
)
65+
66+
sim = Microsimulation()
67+
68+
assert isinstance(sim, CoreMicrosimulation), (
69+
f"isinstance(sim, Microsimulation) returned False. "
70+
f"sim type: {type(sim)}, MRO: {type(sim).__mro__}"
71+
)
72+
73+
def test_person_accessor_kwargs_passed_correctly(self):
74+
"""
75+
Test that the person() accessor passes the correct kwargs to calculate().
76+
77+
This test verifies that use_weights=False is passed to avoid
78+
performance issues in internal calculations.
79+
"""
80+
sim = Microsimulation()
81+
82+
# Call person() which should pass use_weights=False
83+
result_person = sim.person("salary", "2022-01")
84+
85+
# Call calculate() with use_weights=False directly
86+
result_calculate = sim.calculate(
87+
"salary", "2022-01", use_weights=False
88+
)
89+
90+
# Both should return numpy arrays with the same values
91+
assert isinstance(result_person, np.ndarray)
92+
assert isinstance(result_calculate, np.ndarray)
93+
np.testing.assert_array_equal(result_person, result_calculate)

tests/core/tools/test_hugging_face.py

Lines changed: 47 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -124,39 +124,65 @@ def test_get_token_from_user_input(self):
124124
"""Test retrieving token via user input when not in environment"""
125125
test_token = "user_input_token_456"
126126

127-
# Mock both empty environment and user input
127+
# Mock empty environment, interactive mode, and user input
128128
with patch.dict(os.environ, {}, clear=True):
129-
with patch(
130-
"policyengine_core.tools.hugging_face.getpass",
131-
return_value=test_token,
132-
):
133-
result = get_or_prompt_hf_token()
134-
assert result == test_token
129+
with patch("os.isatty", return_value=True):
130+
with patch(
131+
"policyengine_core.tools.hugging_face.getpass",
132+
return_value=test_token,
133+
):
134+
result = get_or_prompt_hf_token()
135+
assert result == test_token
135136

136-
# Verify token was stored in environment
137-
assert os.environ.get("HUGGING_FACE_TOKEN") == test_token
137+
# Verify token was stored in environment
138+
assert os.environ.get("HUGGING_FACE_TOKEN") == test_token
138139

139140
def test_empty_user_input(self):
140-
"""Test handling of empty user input"""
141+
"""Test handling of empty user input in interactive mode"""
141142
with patch.dict(os.environ, {}, clear=True):
142-
with patch(
143-
"policyengine_core.tools.hugging_face.getpass", return_value=""
144-
):
143+
with patch("os.isatty", return_value=True):
144+
with patch(
145+
"policyengine_core.tools.hugging_face.getpass",
146+
return_value="",
147+
):
148+
result = get_or_prompt_hf_token()
149+
# Empty input should return None (not stored)
150+
assert result is None
151+
# Empty token should not be stored
152+
assert os.environ.get("HUGGING_FACE_TOKEN") is None
153+
154+
def test_non_interactive_mode_returns_none(self):
155+
"""Test that non-interactive mode (CI) returns None without prompting"""
156+
with patch.dict(os.environ, {}, clear=True):
157+
with patch("os.isatty", return_value=False):
158+
with patch(
159+
"policyengine_core.tools.hugging_face.getpass"
160+
) as mock_getpass:
161+
result = get_or_prompt_hf_token()
162+
assert result is None
163+
# getpass should not be called in non-interactive mode
164+
mock_getpass.assert_not_called()
165+
166+
def test_empty_env_token_treated_as_none(self):
167+
"""Test that empty string token in env is treated as missing"""
168+
with patch.dict(os.environ, {"HUGGING_FACE_TOKEN": ""}, clear=True):
169+
with patch("os.isatty", return_value=False):
145170
result = get_or_prompt_hf_token()
146-
assert result == ""
147-
assert os.environ.get("HUGGING_FACE_TOKEN") == ""
171+
# Empty string should be treated as None
172+
assert result is None
148173

149174
def test_environment_variable_persistence(self):
150175
"""Test that environment variable persists across multiple calls"""
151176
test_token = "persistence_test_token"
152177

153-
# First call with no environment variable
178+
# First call with no environment variable (interactive mode)
154179
with patch.dict(os.environ, {}, clear=True):
155-
with patch(
156-
"policyengine_core.tools.hugging_face.getpass",
157-
return_value=test_token,
158-
):
159-
first_result = get_or_prompt_hf_token()
180+
with patch("os.isatty", return_value=True):
181+
with patch(
182+
"policyengine_core.tools.hugging_face.getpass",
183+
return_value=test_token,
184+
):
185+
first_result = get_or_prompt_hf_token()
160186

161187
# Second call should use environment variable
162188
second_result = get_or_prompt_hf_token()

0 commit comments

Comments
 (0)