diff --git a/rio_stac/stac.py b/rio_stac/stac.py index 1aaab81..1745713 100644 --- a/rio_stac/stac.py +++ b/rio_stac/stac.py @@ -159,7 +159,8 @@ def get_eobands_info( def _get_stats(arr: numpy.ma.MaskedArray, **kwargs: Any) -> Dict: """Calculate array statistics.""" # Avoid non masked nan/inf values - numpy.ma.fix_invalid(arr, copy=False) + arr = numpy.ma.fix_invalid(arr, copy=True) + sample, edges = numpy.histogram(arr[~arr.mask]) return { "statistics": { diff --git a/tests/fixtures/dataset_missing_nodata_nan.tif b/tests/fixtures/dataset_missing_nodata_nan.tif new file mode 100644 index 0000000..db80b94 Binary files /dev/null and b/tests/fixtures/dataset_missing_nodata_nan.tif differ diff --git a/tests/test_create_item.py b/tests/test_create_item.py index a1fb15a..01c3cd4 100644 --- a/tests/test_create_item.py +++ b/tests/test_create_item.py @@ -4,11 +4,12 @@ import json import os +import numpy import pystac import pytest import rasterio -from rio_stac.stac import create_stac_item +from rio_stac.stac import create_stac_item, get_raster_info from .conftest import requires_hdf4, requires_hdf5 @@ -356,3 +357,18 @@ def test_json_serialization(): assert item.validate() item_dict = item.to_dict() assert json.dumps(item_dict) + + +def test_stats_with_nan_missing_nodata(): + """Stats should ignore nan/inf values. + + Ref: https://github.com/developmentseed/rio-stac/issues/70 + """ + src_path = os.path.join(PREFIX, "dataset_missing_nodata_nan.tif") + with rasterio.open(src_path) as src: + arr = src.read(masked=True) + assert numpy.isnan(arr.max().item()) + + info = get_raster_info(src) + assert info[0]["statistics"]["minimum"] > 0 + assert info[0]["statistics"]["maximum"] > 0