Skip to content

Commit f1ece47

Browse files
committed
Merge dev: bug fixes, type safety, and test improvements
2 parents d812924 + 96403cb commit f1ece47

8 files changed

Lines changed: 13980 additions & 13574 deletions

File tree

data/sample_input.csv

Lines changed: 13555 additions & 13555 deletions
Large diffs are not rendered by default.

src/dii/__main__.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,6 @@
1616

1717
from .calculator import calculate_dii, calculate_dii_detailed
1818
from .reader import load_nutrient_data, validate_input_file
19-
from .reference import get_available_nutrients
2019
from .viewer import display_results, display_nutrients_table
2120
from . import __version__
2221

src/dii/calculator.py

Lines changed: 18 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,7 @@
4444
from __future__ import annotations
4545

4646
import warnings
47-
from typing import List, Optional, Tuple, Union
47+
from typing import Optional, Union
4848

4949
import numpy as np
5050
import pandas as pd
@@ -320,6 +320,7 @@ def calculate_dii(
320320
f"Non-numeric data found in columns {coerced_cols}. "
321321
"Values were coerced to numeric (non-convertible values become NaN).",
322322
UserWarning,
323+
stacklevel=2,
323324
)
324325

325326
# === COVERAGE WARNING ===
@@ -333,6 +334,7 @@ def calculate_dii(
333334
"DII scores may be less reliable with limited nutrients. "
334335
"Consider adding more nutrient columns if available.",
335336
UserWarning,
337+
stacklevel=2,
336338
)
337339

338340
# Filter reference to matched nutrients only
@@ -384,6 +386,8 @@ def _calculate_dii_simple(
384386

385387
# Vectorized DII calculation with explicit float64
386388
total_scores = np.zeros(len(nutrient_data), dtype=FLOAT_DTYPE)
389+
# Track if each row has at least one valid (non-NaN) contribution
390+
has_valid_data = np.zeros(len(nutrient_data), dtype=bool)
387391

388392
for _, ref_row in reference_df.iterrows():
389393
nutrient = ref_row[NUTRIENT_COL]
@@ -411,11 +415,16 @@ def _calculate_dii_simple(
411415
# Compute contribution (weight × percentile)
412416
contributions = percentiles * weight
413417

418+
# Track rows with valid data for this nutrient
419+
has_valid_data |= ~np.isnan(contributions)
420+
414421
# Add to total (handling NaN)
415422
total_scores = np.nansum(
416423
np.stack([total_scores, contributions]), axis=0
417424
).astype(FLOAT_DTYPE)
418425

426+
# Set DII to NaN for rows with no valid nutrient data
427+
total_scores = np.where(has_valid_data, total_scores, np.nan)
419428
result["DII_score"] = total_scores
420429
return result
421430

@@ -464,11 +473,13 @@ def _calculate_dii_detailed(
464473
# Initialize result with ID column if provided
465474
result_data = {}
466475
if id_column and id_column in nutrient_data.columns:
467-
result_data[id_column] = nutrient_data[id_column].values
476+
result_data[id_column] = nutrient_data[id_column].to_numpy()
468477

469478
# Pre-allocate arrays for all calculations (float64)
470479
n_rows = len(nutrient_data)
471480
total_scores = np.zeros(n_rows, dtype=FLOAT_DTYPE)
481+
# Track if each row has at least one valid (non-NaN) contribution
482+
has_valid_data = np.zeros(n_rows, dtype=bool)
472483

473484
# Calculate for each nutrient
474485
for _, ref_row in reference_df.iterrows():
@@ -497,11 +508,16 @@ def _calculate_dii_detailed(
497508
result_data[f"{nutrient}_percentile"] = percentiles
498509
result_data[f"{nutrient}_contribution"] = contributions
499510

511+
# Track rows with valid data for this nutrient
512+
has_valid_data |= ~np.isnan(contributions)
513+
500514
# Accumulate total (handling NaN)
501515
total_scores = np.nansum(
502516
np.stack([total_scores, contributions]), axis=0
503517
).astype(FLOAT_DTYPE)
504518

519+
# Set DII to NaN for rows with no valid nutrient data
520+
total_scores = np.where(has_valid_data, total_scores, np.nan)
505521
result_data["DII_score"] = total_scores
506522

507523
return pd.DataFrame(result_data)

src/dii/data/dii_reference.csv

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
nutrient,weight,global_mean,global_sd
22
Alcohol,-0.278,13.98,3.72
3-
vitamin B12,0.106,5.15,2.7
4-
vitamin B6,-0.365,1.47,0.74
3+
Vitamin B12,0.106,5.15,2.7
4+
Vitamin B6,-0.365,1.47,0.74
55
Beta-carotene,-0.584,3718,1720
66
Caffeine,-0.11,8.05,6.67
77
Carbohydrate,0.097,272.2,40

src/dii/viewer.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99

1010
import pandas as pd
1111

12-
from .reference import load_reference_table, get_available_nutrients
12+
from .reference import load_reference_table
1313

1414

1515
def display_results(
@@ -61,15 +61,15 @@ def _print_summary(results: pd.DataFrame, detailed: bool = False) -> None:
6161
n = len(scores)
6262

6363
print(f"\nParticipants: {n}")
64-
print(f"\nDII Score Summary:")
64+
print("\nDII Score Summary:")
6565
print(f" Mean: {scores.mean():>8.3f}")
6666
print(f" Std: {scores.std():>8.3f}")
6767
print(f" Min: {scores.min():>8.3f}")
6868
print(f" Max: {scores.max():>8.3f}")
6969
print(f" Median: {scores.median():>8.3f}")
7070

7171
# Interpretation breakdown
72-
print(f"\nScore Distribution:")
72+
print("\nScore Distribution:")
7373
anti_inflammatory = (scores < -1).sum()
7474
neutral = ((scores >= -1) & (scores <= 1)).sum()
7575
pro_inflammatory = (scores > 1).sum()
@@ -79,7 +79,7 @@ def _print_summary(results: pd.DataFrame, detailed: bool = False) -> None:
7979
print(f" Pro-inflammatory (> 1): {pro_inflammatory:>4} ({pro_inflammatory/n*100:>5.1f}%)")
8080

8181
if detailed and n <= 20:
82-
print(f"\nIndividual Scores:")
82+
print("\nIndividual Scores:")
8383
for i, score in enumerate(scores):
8484
interpretation = _interpret_score(score)
8585
print(f" Row {i+1:>3}: {score:>8.3f} ({interpretation})")

src/dii/visualization.py

Lines changed: 9 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
"""
99

1010
from pathlib import Path
11-
from typing import Optional, Union, List
11+
from typing import Optional, Union
1212

1313
import pandas as pd
1414
import numpy as np
@@ -75,7 +75,8 @@ def plot_dii_distribution(
7575
)
7676

7777
# Color bars by category
78-
for i, patch in enumerate(patches):
78+
# Note: BarContainer is iterable but mypy stubs don't reflect this correctly
79+
for i, patch in enumerate(list(patches)): # type: ignore[arg-type]
7980
bin_center = (bins[i] + bins[i+1]) / 2
8081
if bin_center < -1:
8182
patch.set_facecolor('#2ecc71') # Green - anti-inflammatory
@@ -184,11 +185,11 @@ def plot_nutrient_contributions(
184185
fig, ax = plt.subplots(figsize=figsize)
185186

186187
# Colors based on direction
187-
colors = ['#2ecc71' if v < 0 else '#e74c3c' for v in contributions.values]
188+
colors = ['#2ecc71' if v < 0 else '#e74c3c' for v in contributions.to_numpy()]
188189

189190
# Create bars
190191
y_pos = np.arange(len(contributions))
191-
bars = ax.barh(y_pos, contributions.values, color=colors, edgecolor='white', linewidth=0.5)
192+
ax.barh(y_pos, contributions.values, color=colors, edgecolor='white', linewidth=0.5)
192193

193194
# Labels
194195
ax.set_yticks(y_pos)
@@ -273,9 +274,10 @@ def plot_dii_categories_pie(
273274

274275
colors = ['#2ecc71', '#f39c12', '#e74c3c'][:len(categories)]
275276

276-
wedges, texts, autotexts = ax.pie(
277-
categories.values(),
278-
labels=categories.keys(),
277+
# Note: ax.pie returns 3 values when autopct is provided, but stubs don't reflect this
278+
wedges, texts, autotexts = ax.pie( # type: ignore[misc]
279+
list(categories.values()),
280+
labels=list(categories.keys()),
279281
colors=colors,
280282
autopct=lambda pct: f'{pct:.1f}%\n({int(pct/100*len(scores)):,})',
281283
startangle=90,

templates/input_template.csv

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,4 @@
1-
participant_id,Alcohol,vitamin B12,vitamin B6,Beta-carotene,Caffeine,Carbohydrate,Cholesterol,Energy,Total fat,Fiber,Folic acid,Iron,Magnesium,MUFA,Niacin,n-3 fatty acid,n-6 fatty acid,Protein,PUFA,Riboflavin,Saturated fat,Selenium,Thiamin,Trans fat,Vitamin A,Vitamin C,Vitamin D,Vitamin E,Zinc
1+
participant_id,Alcohol,Vitamin B12,Vitamin B6,Beta-carotene,Caffeine,Carbohydrate,Cholesterol,Energy,Total fat,Fiber,Folic acid,Iron,Magnesium,MUFA,Niacin,n-3 fatty acid,n-6 fatty acid,Protein,PUFA,Riboflavin,Saturated fat,Selenium,Thiamin,Trans fat,Vitamin A,Vitamin C,Vitamin D,Vitamin E,Zinc
22
1,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
33
2,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
44
3,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
5-

0 commit comments

Comments
 (0)