4444from __future__ import annotations
4545
4646import warnings
47- from typing import List , Optional , Tuple , Union
47+ from typing import Optional , Union
4848
4949import numpy as np
5050import pandas as pd
@@ -320,6 +320,7 @@ def calculate_dii(
320320 f"Non-numeric data found in columns { coerced_cols } . "
321321 "Values were coerced to numeric (non-convertible values become NaN)." ,
322322 UserWarning ,
323+ stacklevel = 2 ,
323324 )
324325
325326 # === COVERAGE WARNING ===
@@ -333,6 +334,7 @@ def calculate_dii(
333334 "DII scores may be less reliable with limited nutrients. "
334335 "Consider adding more nutrient columns if available." ,
335336 UserWarning ,
337+ stacklevel = 2 ,
336338 )
337339
338340 # Filter reference to matched nutrients only
@@ -384,6 +386,8 @@ def _calculate_dii_simple(
384386
385387 # Vectorized DII calculation with explicit float64
386388 total_scores = np .zeros (len (nutrient_data ), dtype = FLOAT_DTYPE )
389+ # Track if each row has at least one valid (non-NaN) contribution
390+ has_valid_data = np .zeros (len (nutrient_data ), dtype = bool )
387391
388392 for _ , ref_row in reference_df .iterrows ():
389393 nutrient = ref_row [NUTRIENT_COL ]
@@ -411,11 +415,16 @@ def _calculate_dii_simple(
411415 # Compute contribution (weight × percentile)
412416 contributions = percentiles * weight
413417
418+ # Track rows with valid data for this nutrient
419+ has_valid_data |= ~ np .isnan (contributions )
420+
414421 # Add to total (handling NaN)
415422 total_scores = np .nansum (
416423 np .stack ([total_scores , contributions ]), axis = 0
417424 ).astype (FLOAT_DTYPE )
418425
426+ # Set DII to NaN for rows with no valid nutrient data
427+ total_scores = np .where (has_valid_data , total_scores , np .nan )
419428 result ["DII_score" ] = total_scores
420429 return result
421430
@@ -464,11 +473,13 @@ def _calculate_dii_detailed(
464473 # Initialize result with ID column if provided
465474 result_data = {}
466475 if id_column and id_column in nutrient_data .columns :
467- result_data [id_column ] = nutrient_data [id_column ].values
476+ result_data [id_column ] = nutrient_data [id_column ].to_numpy ()
468477
469478 # Pre-allocate arrays for all calculations (float64)
470479 n_rows = len (nutrient_data )
471480 total_scores = np .zeros (n_rows , dtype = FLOAT_DTYPE )
481+ # Track if each row has at least one valid (non-NaN) contribution
482+ has_valid_data = np .zeros (n_rows , dtype = bool )
472483
473484 # Calculate for each nutrient
474485 for _ , ref_row in reference_df .iterrows ():
@@ -497,11 +508,16 @@ def _calculate_dii_detailed(
497508 result_data [f"{ nutrient } _percentile" ] = percentiles
498509 result_data [f"{ nutrient } _contribution" ] = contributions
499510
511+ # Track rows with valid data for this nutrient
512+ has_valid_data |= ~ np .isnan (contributions )
513+
500514 # Accumulate total (handling NaN)
501515 total_scores = np .nansum (
502516 np .stack ([total_scores , contributions ]), axis = 0
503517 ).astype (FLOAT_DTYPE )
504518
519+ # Set DII to NaN for rows with no valid nutrient data
520+ total_scores = np .where (has_valid_data , total_scores , np .nan )
505521 result_data ["DII_score" ] = total_scores
506522
507523 return pd .DataFrame (result_data )
0 commit comments