From cd1211980b7f7430d5a7ad90d877e06b65aaad4c Mon Sep 17 00:00:00 2001 From: vincentsarago Date: Mon, 8 Sep 2025 12:28:52 +0200 Subject: [PATCH 1/4] custom histogram bins/range and fall back --- rio_stac/stac.py | 59 +++++++++++++----- ...5631_N0400_R094_T31TCN_20220722T171159.tif | Bin 0 -> 20651 bytes tests/test_create_item.py | 35 +++++++++++ 3 files changed, 80 insertions(+), 14 deletions(-) create mode 100644 tests/fixtures/S2A_MSIL2A_20220722T105631_N0400_R094_T31TCN_20220722T171159.tif diff --git a/rio_stac/stac.py b/rio_stac/stac.py index 1aaab81..1728dd9 100644 --- a/rio_stac/stac.py +++ b/rio_stac/stac.py @@ -5,7 +5,7 @@ import os import warnings from contextlib import ExitStack -from typing import Any, Dict, List, Optional, Tuple, Union +from typing import Dict, List, Optional, Sequence, Tuple, Union import numpy import pystac @@ -156,12 +156,16 @@ def get_eobands_info( return eo_bands -def _get_stats(arr: numpy.ma.MaskedArray, **kwargs: Any) -> Dict: +def _get_stats( + arr: numpy.ma.MaskedArray, + bins: Union[int, str, Sequence] = 10, + range: Optional[Tuple[float, float]] = None, +) -> Dict: """Calculate array statistics.""" # Avoid non masked nan/inf values numpy.ma.fix_invalid(arr, copy=False) - sample, edges = numpy.histogram(arr[~arr.mask]) - return { + + stats = { "statistics": { "mean": arr.mean().item(), "minimum": arr.min().item(), @@ -170,19 +174,35 @@ def _get_stats(arr: numpy.ma.MaskedArray, **kwargs: Any) -> Dict: "valid_percent": float( numpy.count_nonzero(~arr.mask) / float(arr.data.size) * 100 ), - }, - "histogram": { - "count": len(edges), - "min": float(edges.min()), - "max": float(edges.max()), - "buckets": sample.tolist(), - }, + } + } + + try: + sample, edges = numpy.histogram(arr[~arr.mask], bins=bins, range=range) + + except ValueError: + _, counts = numpy.unique(arr[~arr.mask], return_counts=True) + warnings.warn( + f"Could not calculate the histogram, fall back to automatic bin={len(counts) + 1}.", + UserWarning, + ) + sample, edges = numpy.histogram(arr[~arr.mask], bins=len(counts) + 1) + + stats["histogram"] = { + "count": len(edges), + "min": float(edges.min()), + "max": float(edges.max()), + "buckets": sample.tolist(), } + return stats + def get_raster_info( # noqa: C901 src_dst: Union[DatasetReader, DatasetWriter, WarpedVRT, MemoryFile], max_size: int = 1024, + histogram_bins: Union[int, str, Sequence] = 10, + histogram_range: Optional[Tuple[float, float]] = None, ) -> List[Dict]: """Get raster metadata. @@ -230,7 +250,11 @@ def get_raster_info( # noqa: C901 value["unit"] = src_dst.units[band - 1] value.update( - _get_stats(src_dst.read(indexes=band, out_shape=(height, width), masked=True)) + _get_stats( + src_dst.read(indexes=band, out_shape=(height, width), masked=True), + bins=histogram_bins, + range=histogram_range, + ) ) meta.append(value) @@ -271,7 +295,7 @@ def get_media_type( elif driver == "PNG": return pystac.MediaType.PNG - warnings.warn("Could not determine the media type from GDAL driver.") + warnings.warn("Could not determine the media type from GDAL driver.", UserWarning) return None @@ -295,6 +319,8 @@ def create_stac_item( geom_densify_pts: int = 0, geom_precision: int = -1, geographic_crs: rasterio.crs.CRS = EPSG_4326, + histogram_bins: Union[int, str, Sequence] = 10, + histogram_range: Optional[Tuple[float, float]] = None, ) -> pystac.Item: """Create a Stac Item. @@ -392,7 +418,12 @@ def create_stac_item( ) raster_info = { - "raster:bands": get_raster_info(dataset, max_size=raster_max_size) + "raster:bands": get_raster_info( + dataset, + max_size=raster_max_size, + histogram_bins=histogram_bins, + histogram_range=histogram_range, + ) } eo_info: Dict[str, List] = {} diff --git a/tests/fixtures/S2A_MSIL2A_20220722T105631_N0400_R094_T31TCN_20220722T171159.tif b/tests/fixtures/S2A_MSIL2A_20220722T105631_N0400_R094_T31TCN_20220722T171159.tif new file mode 100644 index 0000000000000000000000000000000000000000..330d01f3f9a26a6c24c934ac602857e2e2d0b788 GIT binary patch literal 20651 zcmeI2d0bOh_P~R5WJZg$jztAoMu%G3MhFA~#NZ$zWmm!`2oXZS2uUO%i4Yd8MXE@s z6+{I@i-5=`8kQ0Q4hpHFpe$htNhlG*9wUY|@P04W_S5fdTXMTB*+>`U}JNMl0 zJ@=iHe4dlj=aBml2*h!poht_I<>_tju@z4~^ODu;0Ve0i*1>Zm+9{y$i;}bDx7J%G1La<7VgV;rUkU>R_i- zD~)07&=_wgcQ5$cQ(m5SPOjiNS39&929DW#$jbqZ@pL%gVCUtqZ=H*~hp#)v$=%+= z?SP$^(_U8xjKe-BFDDOol)J||L&zowL`@x{2F_pu_)>c-YlHF{P*&=yz?154^?wFG zuYD^kuleMytn9b#cY4(CyWT1p8=$_S(S# z(4efeJpsxaPJakd2jwkb|F6IK5V97OO+mRlSsh{s%11!CB6$r&AC#|y^2wWPARlQ! zAb$qOJq5}ifO=vu5qyV${a62c(_zIob#VOE{&p(^zzr8292y)QY`f0TaJN%5j$lX( zCg4!pgRxjV1s{zk5i#It+;&?tQ!_KDDFSL{<^_YY*@DSL_ro!;J$k4Y|Lf|kM3}FU?!eH<)DBR4< z8X62o;GpJVmRJj1C=3>Aie1@CIF3ZXMU!JNSTY54oftu)6IbquCj^J%z>Z{G6ctB_ zemgSE!Ww36hO>mitwSuJ@GzVe)XE%Y3WbM+nqxy@p)f?qD#U)ZN1=vLuw?x4H%s*I zL%~qM1;G<>kr*>fq**Y=0d0rb@8oXhy0Z5kKAzylMBBL?aCLHb#MrqyZnt%Jume?I zD_?M&OrqgKapdi`I7;x!UC1~JDUz!6``zTo6lGvyXgD(E5Bq|%qL9Er|8;gNk59po zX?QFSqYU)zw!iWGMZ5AP`y9~ro=yjp_n_^DlwiW~NN|goZZ{;8ltL)(x0Tp#izNk9 zqJtyf!wzgkJ0X!&+>2 zF*AD~J1fU>Fe&jqEo0*ZlW3fByX+zX-p!E2h=ol-EDmVh4UW`Ij-Y zvVC)?y{QqvG!CY3r3@AdZ`w5-dv$}ZsY>(za zjWwHhjCDyDFAtcNA-)b$)7<#{HE`M}BI=4AW?U1{{ zlfMh?Qkm-ol9MjabS-v8*(G-vB$G;Xj(PH|wn${RVu4QiSX=d?!3m}T2IEJG3=Cds ziO6evmJuaHP0AXzT%!6B{vi#AnrcRPF|vUiJz4(F!f^X)4ip)7+?qcwJC)SoQEu{O zq!XJGba<;kmoIMOJIm-x0U?U)khla|Un(*@wrcsG0SM{-^Cc$Hx&gkI**~Dhrh>M% za9Lar1LLOW4H3fENdLi%t$0L2D=Tt9K40dNnQxx6_)_aF@)eqaJu`}ug+`;4T5b0c zzOYxuCSe(0T9t1yNhmNnGy8dSi@Il;bnw;dk|>}KphsfkgQu={0^gt_oA^zhi)TCLa> zoc~n?RNM>i-dvkn`o))ez53mCRi9sDGJEFv`6s56hI>dpu(b+lXIEAX@Absj6){}W zOK-gIxU7qoMj!u{+3Xodu&0ElNpEh&(??u7?lLr9u*)*LHV0w4EAIimd?=<{UxM*G zmnS^NY=7QV6B5{`sRM`dhp*I$XYV)RXQk4dAMG6{<}Z3GmQP+9Sw0L4N+iurN2MMT zEy{s@4b-oG{@?Z_bpA*V3nj_d#{gfT5y$&Wn0`@?#OUzrKXQ#FBqZ2^j?(LHs=CD}HBe`+2oW!3*%0=n{ zrkHb$ciRg@qV`_h32JB`2X+S*@cL}rtOsR${rZ40%UMm6!(AWKGhCt`{cvdnb;H63 zb)oi@bjsQyYayF>U0Ip0a`b$!#dG41)-Nn-B?R+Ef(33@&@A@a3Jdg(R^>}$EP zaV=bU*pB8GsQMWCn<4+q}-6Wmh%bSsxH$pKr9V1fOh0-h?6RzmuNn&#*}V zgqqvtGo0a4p7x%sEFpi6!^)y8bwTYbzkn|0{ymaX^y}-|3#UkTt`~E2S@ezZ#k+yap>#JRs(N>tyYwliXvfqwfi@=gu2HPV(bV!i-_ep2 z7@oYmLgIj<^bg&;Iy<^cGs%L>`{WYSb*Z6yr_bQaGvwWielF38Wm{4;*V`ZymP9$1 zmuL-jVtQvl5#q#yvk^$T8^ddmB`3gwxgmAa2O@S0p_8BT+#c&Eig{1j(z;8uO5u@G z-MNSN^kl>92~j;CHyzdQ*Le`+B)E?*yPTfGdF3OxuQmC{{?UEP&onk(ss*D_FGsI| zKq`A!RG2PildAVg|56%7N$ZrgT+6wvDb1J#YG)!xCg!?m@fuYQnZ=*WQmzfn$!(iz zV#d~X#JdT)) zU-7m~4_9g6Y-pdLuN1}>*8@(1f2@xCB(%4WQ<>J?m57qgbuA>ar9P>gwq4wpDJjoc zjuB6Ow52M>%5$;<<$Hx)0nLU|G4bEwOZ)eRIBW;OXXk0cs~aEG1dORx2E0*T%cZ9+>!q zG~FlHQJrB|HL9D-8**tt;sR@;xm?1oluizp9hLU$^*;ZYPB@y9v%#eK8@##bO9B*I0J+(WS`t_glc6WUozC;N)te}q~6RV$o-6|L`c=A&Mb&L>D zfl&Ugv&wPDD4^lWY*ejeXFd}H(9wYOTMsYstFuE^!1tY9Uo1Cta@nLz8x1_&zerTOxoF5`^Kw)itY#-3B&EYmSA?843NPkppn7Ed_3DdRIeHqJ%3t0_u! z>)}Z7P|eQ|-|T!vi?pLmbgknq>hV0$84CuxO;V-c+^LM3h3n+HSTEQaOutSV%?(xW z=zjGYr+$}6Ux z7(ZW3?M-j4OGC`uJ_A-(uLR9f@4?Z}xEqo;VZm~4TijQDn#f@P8NU9srF6r>VjbYS z*lL{VGg#p!efsU|oRlNH||-Eww~E?LC84a+@~qEFkgJWb^0~1N*I9Z zlWEsWMNby31gX%im+1?$VviT>0+jt)QVb4Rc;kxUgzsZ&G-&6&3_Uni~}S9Aga zz!9TMb%Ey`One80_ARoef{@5ttcT$wSGMn`@Gva}Tog}A5Neiy$!y-uXn8J!>}pg0 zX7|`r-V}k-aTy1-2i%3i#e~X@$$AHwO^(`mF~=ja=a*RQ7q3Qpr}jLiOLl$5mgk{K z$J^Ap=D6a=2e%CEm3WwMd+I0 zaeQ~4LLnF4kmf|^dC$9cji4U|O>IN{mh1yuK@*^%U zvKY$|9ruRHhuP`|jB+l*W^%u4Nps`HmWuKDB3&y+8w)vCv-MC1E+&n{`>SSFeTB=| zrl7!cR~{5sJowv?pY5k|pmp>rjiO*CYKq`Vj-7Dj>7W?OpDV`<`S&^e*ME}F*i)}Mv<8}n)T+n0r3dM_Z1EC0D zjFcBg@hXN+yto2E{>_C=glAdV9a|9qdc0R?YWaQp%&enS@ zOX}z#a*OcQ6Zfz58e?h#fp>hLoBN2cq@AE$$)X8#0|tLKxEaWGH@}yh`-sk$kKFqa z?Kci+F1cF7N4>Ze1EWT-`QntQdHS9QL zw4clQx*b&cjP)(!tXQ&8#>cm@0&L7CubFt|!5g;i+~v$>W1QH-g0L{G^a;H@zQQAz zi+B}S{&SaAfrs^Ab(2+05G5`mO8MQ*oH3da*A2Vw@$>mV&6hE-7Rz^BtQY8{$%(bg z6YVTyy{S2}FnQTvb+2DadKTQQXEP}xfeBSHT=rEU=W$-wmfFcNtJ&@*KlwgN|J)+@ zmD8^~9Yqfnijm>R>x;8Gw8IZ$QUepv%pJ&fVuP@nKGiJjO)p(^uV>gSMD|4+mn6mz zBn-C5KeZd&-aioh#pf2Y#P>5sIbAJTcaw)-u+eF*QaPVO_%epQ32EUg&f0lNdB=>b zxo({>o5xygZ7-Ne|3Z7@T}dzTOsOcHVgLkCB6IhGcXw_%4_NG!@S0vT$-BL~b&?Tq zt{ut~kjkm%ck_}Z%7h%72uN?HcJ)M1ti$^=;AS-XZ$R;Q&p zqavUppdz3mpdz3mpdz3mpd#=eM_@qIb6Wk)FGYqx_x=OkszXIUMLN(f{xJNfs4yg#J2&f3C2&f3C2&f4B{~$21 LW8T*|N%{I8WCJwh literal 0 HcmV?d00001 diff --git a/tests/test_create_item.py b/tests/test_create_item.py index a1fb15a..91b09d5 100644 --- a/tests/test_create_item.py +++ b/tests/test_create_item.py @@ -356,3 +356,38 @@ def test_json_serialization(): assert item.validate() item_dict = item.to_dict() assert json.dumps(item_dict) + + +def test_stats_unique_values(): + """issue 68 - + ref: https://github.com/developmentseed/rio-stac/issues/68 + """ + src_path = os.path.join( + PREFIX, "S2A_MSIL2A_20220722T105631_N0400_R094_T31TCN_20220722T171159.tif" + ) + with pytest.warns(UserWarning): + item = create_stac_item(src_path, with_raster=True) + assert item.validate() + item_dict = item.to_dict() + assert "raster:bands" in item_dict["assets"]["asset"] + stats = item_dict["assets"]["asset"]["raster:bands"][9]["histogram"] + assert len(stats["buckets"]) == 3 + + +def test_create_item_raster_custom_histogram(): + """Should return a valid item with raster properties.""" + src_path = os.path.join(PREFIX, "dataset_cog.tif") + item = create_stac_item( + src_path, + input_datetime=input_date, + with_raster=True, + raster_max_size=128, + histogram_bins=5, + histogram_range=[0, 10], + ) + assert item.validate() + item_dict = item.to_dict() + stats = item_dict["assets"]["asset"]["raster:bands"][0]["histogram"] + assert len(stats["buckets"]) == 5 + assert stats["max"] == 10 + assert stats["min"] == 0 From 695eb072241506cddb329cdfb1a57f6e62a91833 Mon Sep 17 00:00:00 2001 From: vincentsarago Date: Mon, 8 Sep 2025 14:17:52 +0200 Subject: [PATCH 2/4] allow test to fail for old numpy version --- tests/test_create_item.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tests/test_create_item.py b/tests/test_create_item.py index 91b09d5..b0d05fb 100644 --- a/tests/test_create_item.py +++ b/tests/test_create_item.py @@ -3,6 +3,7 @@ import datetime import json import os +import sys import pystac import pytest @@ -358,6 +359,7 @@ def test_json_serialization(): assert json.dumps(item_dict) +@pytest.mark.xfail(sys.version_info < (3, 10), reason="Old numpy do not raise error") def test_stats_unique_values(): """issue 68 - ref: https://github.com/developmentseed/rio-stac/issues/68 From e215c92e96a0340024c3ae6b6bac189e93a0687a Mon Sep 17 00:00:00 2001 From: vincentsarago Date: Mon, 8 Sep 2025 15:23:34 +0200 Subject: [PATCH 3/4] do not silent other ValueError --- rio_stac/stac.py | 17 ++++++++++------- tests/test_create_item.py | 14 ++++++++++++++ 2 files changed, 24 insertions(+), 7 deletions(-) diff --git a/rio_stac/stac.py b/rio_stac/stac.py index 1728dd9..91dbd55 100644 --- a/rio_stac/stac.py +++ b/rio_stac/stac.py @@ -180,13 +180,16 @@ def _get_stats( try: sample, edges = numpy.histogram(arr[~arr.mask], bins=bins, range=range) - except ValueError: - _, counts = numpy.unique(arr[~arr.mask], return_counts=True) - warnings.warn( - f"Could not calculate the histogram, fall back to automatic bin={len(counts) + 1}.", - UserWarning, - ) - sample, edges = numpy.histogram(arr[~arr.mask], bins=len(counts) + 1) + except ValueError as e: + if "Too many bins for data range." in str(e): + _, counts = numpy.unique(arr[~arr.mask], return_counts=True) + warnings.warn( + f"Could not calculate the histogram, fall back to automatic bin={len(counts) + 1}.", + UserWarning, + ) + sample, edges = numpy.histogram(arr[~arr.mask], bins=len(counts) + 1) + else: + raise e stats["histogram"] = { "count": len(edges), diff --git a/tests/test_create_item.py b/tests/test_create_item.py index b0d05fb..4f2f495 100644 --- a/tests/test_create_item.py +++ b/tests/test_create_item.py @@ -4,6 +4,7 @@ import json import os import sys +import warnings import pystac import pytest @@ -375,6 +376,19 @@ def test_stats_unique_values(): stats = item_dict["assets"]["asset"]["raster:bands"][9]["histogram"] assert len(stats["buckets"]) == 3 + # Should not raise warnings when bins is good + with warnings.catch_warnings(): + warnings.simplefilter("error") + item = create_stac_item(src_path, with_raster=True, histogram_bins=3) + + with pytest.raises(ValueError): + create_stac_item( + src_path, with_raster=True, histogram_bins=3, histogram_range=(0, -1) + ) + + with pytest.raises(ValueError): + create_stac_item(src_path, with_raster=True, histogram_range=(0, -1)) + def test_create_item_raster_custom_histogram(): """Should return a valid item with raster properties.""" From 3bc7ba27abcf2235eb974cf8e7c0b2920a846f39 Mon Sep 17 00:00:00 2001 From: vincentsarago Date: Tue, 9 Sep 2025 10:03:56 +0200 Subject: [PATCH 4/4] update changelog --- CHANGES.md | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/CHANGES.md b/CHANGES.md index 2ae0554..f9d7046 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -1,4 +1,10 @@ +## 0.11.2 (2025-09-09) + +* fix statistics for Raster with Nan/Inf values and missing Nodata +* add `histogram_bins` and `histogram_range` options for histogram configuration +* add fallback for histogram calculation when data is homogenous + ## 0.11.1 (2025-08-28) * avoid serialization issue for statistics