-
Notifications
You must be signed in to change notification settings - Fork 12
Open
Labels
benchmark-failureintegration test failedintegration test failed
Description
Benchmark scenario ID: worldcereal_crop_type
Benchmark scenario definition: https://github.com/ESA-APEx/apex_algorithms/blob/4ea4f5aabfc75d331e13773577a37c5a2f272aaa/algorithm_catalog/vito/worldcereal_crop_type/benchmark_scenarios/worldcereal_crop_type.json
openEO backend: openeofed.dataspace.copernicus.eu
GitHub Actions workflow run: https://github.com/ESA-APEx/apex_algorithms/actions/runs/23004996669
Workflow artifacts: https://github.com/ESA-APEx/apex_algorithms/actions/runs/23004996669#artifacts
Test start: 2026-03-12 13:44:27.871192+00:00
Test duration: 0:12:13.069710
Test outcome: ❌ failed
Last successful test phase: download-reference
Failure in test phase: compare:derived_from-change
Contact Information
| Name | Organization | Contact |
|---|---|---|
| Kristof Van Tricht | VITO | Contact via VITO (VITO Website, GitHub) |
Process Graph
{
"worldcerealcropextent1": {
"process_id": "worldcereal_crop_type",
"namespace": "https://worldcereal.github.io/worldcereal-classification/udp/worldcereal_crop_type.json",
"arguments": {
"spatial_extent": {
"west": 622694.5968575787,
"east": 623079.000934101,
"south": 5672232.857114074,
"north": 5672519.995940826,
"crs": "EPSG:32631",
"srs": "EPSG:32631"
},
"temporal_extent": [
"2018-05-01",
"2019-04-30"
],
"model_url": "https://s3.waw3-1.cloudferro.com/swift/v1/APEx-benchmarks/worldcereal_crop_type/test_worldcereal_crop_type_custommodel.onnx"
},
"result": true
}
}Error Logs
scenario = BenchmarkScenario(id='worldcereal_crop_type', description='WorldCereal crop type benchmark', backend='openeofed.datasp...gorithms/apex_algorithms/algorithm_catalog/vito/worldcereal_crop_type/benchmark_scenarios/worldcereal_crop_type.json'))
connection_factory = <function connection_factory.<locals>.get_connection at 0x7fe051595d00>
tmp_path = PosixPath('/home/runner/work/apex_algorithms/apex_algorithms/qa/benchmarks/tmp_path_root/test_run_benchmark_worldcereal0')
track_metric = <function track_metric.<locals>.track at 0x7fe051595e40>
track_phase = <function track_phase.<locals>.track at 0x7fe051595f80>
upload_assets_on_fail = <function upload_assets_on_fail.<locals>.collect at 0x7fe051596020>
request = <FixtureRequest for <Function test_run_benchmark[worldcereal_crop_type]>>
@pytest.mark.parametrize(
"scenario",
[
# Use scenario id as parameterization id to give nicer test names.
pytest.param(uc, id=uc.id)
for uc in get_benchmark_scenarios()
],
)
def test_run_benchmark(
scenario: BenchmarkScenario,
connection_factory,
tmp_path: Path,
track_metric,
track_phase,
upload_assets_on_fail,
request,
):
track_metric("scenario_id", scenario.id)
with track_phase(phase="connect"):
# Check if a backend override has been provided via cli options.
override_backend = request.config.getoption("--override-backend")
backend_filter = request.config.getoption("--backend-filter")
if backend_filter and not re.match(backend_filter, scenario.backend):
# TODO apply filter during scenario retrieval, but seems to be hard to retrieve cli param
pytest.skip(
f"skipping scenario {scenario.id} because backend {scenario.backend} does not match filter {backend_filter!r}"
)
backend = scenario.backend
if override_backend:
_log.info(f"Overriding backend URL with {override_backend!r}")
backend = override_backend
connection: openeo.Connection = connection_factory(url=backend)
report_path = None
with track_phase(phase="create-job"):
# TODO #14 scenario option to use synchronous instead of batch job mode?
job = connection.create_job(
process_graph=scenario.process_graph,
title=f"APEx benchmark {scenario.id}",
additional=scenario.job_options,
)
track_metric("job_id", job.job_id)
if request.config.getoption("--upload-benchmark-report"):
report_path = tmp_path / "benchmark_report.json"
report_path.write_text(json.dumps({
"job_id": job.job_id,
"scenario_id": scenario.id,
"scenario_description": scenario.description,
"scenario_backend": scenario.backend,
"scenario_source": str(scenario.source) if scenario.source else None,
"reference_data": scenario.reference_data,
"reference_options": scenario.reference_options,
}, indent=2))
upload_assets_on_fail(report_path)
with track_phase(phase="run-job"):
# TODO: monitor timing and progress
# TODO: separate "job started" and run phases?
max_minutes = request.config.getoption("--maximum-job-time-in-minutes")
if max_minutes:
def _timeout_handler(signum, frame):
raise TimeoutError(
f"Batch job {job.job_id} exceeded maximum allowed time of {max_minutes} minutes"
)
old_handler = signal.signal(signal.SIGALRM, _timeout_handler)
signal.alarm(max_minutes * 60)
try:
job.start_and_wait()
finally:
if max_minutes:
signal.alarm(0)
signal.signal(signal.SIGALRM, old_handler)
with track_phase(phase="collect-metadata"):
collect_metrics_from_job_metadata(job, track_metric=track_metric)
results = job.get_results()
collect_metrics_from_results_metadata(results, track_metric=track_metric)
with track_phase(phase="download-actual"):
# Download actual results
actual_dir = tmp_path / "actual"
paths = results.download_files(target=actual_dir, include_stac_metadata=True)
# Upload assets on failure
upload_assets_on_fail(*paths)
with track_phase(phase="download-reference"):
reference_dir = download_reference_data(
scenario=scenario, reference_dir=tmp_path / "reference"
)
if report_path is not None:
report = json.loads(report_path.read_text())
report["actual_files"] = {
str(p.relative_to(actual_dir)): f"{p.stat().st_size / 1024:.1f} kb"
for p in sorted(actual_dir.rglob("*")) if p.is_file()
}
ref_files = {}
for p in sorted(reference_dir.rglob("*")):
if not p.is_file():
continue
rel = p.relative_to(reference_dir)
size_str = f"{p.stat().st_size / 1024:.1f} kb"
actual_counterpart = actual_dir / rel
if not actual_counterpart.exists():
size_str += " (missing in actual)"
elif actual_counterpart.stat().st_size != p.stat().st_size:
size_str += f" (actual: {actual_counterpart.stat().st_size / 1024:.1f} kb)"
ref_files[str(rel)] = size_str
report["reference_files"] = ref_files
report_path.write_text(json.dumps(report, indent=2))
with track_phase(
phase="compare", describe_exception=analyse_results_comparison_exception
):
# Compare actual results with reference data
> assert_job_results_allclose(
actual=actual_dir,
expected=reference_dir,
tmp_path=tmp_path,
rtol=scenario.reference_options.get("rtol", 1e-3),
atol=scenario.reference_options.get("atol", 1),
pixel_tolerance=scenario.reference_options.get("pixel_tolerance", 1),
)
tests/test_benchmarks.py:146:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
actual = PosixPath('/home/runner/work/apex_algorithms/apex_algorithms/qa/benchmarks/tmp_path_root/test_run_benchmark_worldcereal0/actual')
expected = PosixPath('/home/runner/work/apex_algorithms/apex_algorithms/qa/benchmarks/tmp_path_root/test_run_benchmark_worldcereal0/reference')
def assert_job_results_allclose(
actual: Union[BatchJob, JobResults, str, Path],
expected: Union[BatchJob, JobResults, str, Path],
*,
rtol: float = _DEFAULT_RTOL,
atol: float = _DEFAULT_ATOL,
pixel_tolerance: float = _DEFAULT_PIXELTOL,
tmp_path: Optional[Path] = None,
):
"""
Assert that two job results sets are equal (with tolerance).
:param actual: actual job results, provided as :py:class:`~openeo.rest.job.BatchJob` object,
:py:meth:`~openeo.rest.job.JobResults` object or path to directory with downloaded assets.
:param expected: expected job results, provided as :py:class:`~openeo.rest.job.BatchJob` object,
:py:meth:`~openeo.rest.job.JobResults` object or path to directory with downloaded assets.
:param rtol: relative tolerance
:param atol: absolute tolerance
:param pixel_tolerance: maximum fraction of pixels (in percent)
that is allowed to be significantly different (considering ``atol`` and ``rtol``)
:param tmp_path: root temp path to download results if needed.
It's recommended to pass pytest's `tmp_path` fixture here
:raises AssertionError: if not equal within the given tolerance
.. versionadded:: 0.31.0
.. warning::
This function is experimental and subject to change.
"""
issues = _compare_job_results(
actual, expected, rtol=rtol, atol=atol, pixel_tolerance=pixel_tolerance, tmp_path=tmp_path
)
if issues:
> raise AssertionError("\n".join(issues))
E AssertionError: Issues for metadata file 'job-results.json':
E Differing 'derived_from' links (217 common, 102 only in actual, 100 only in expected):
E only in actual: {'S2A_MSIL2A_20180501T105031_N0500_R051_T31UFS_20230831T054525', '/eodata/Sentinel-1/SAR/IW_GRDH_1S-COG/2018/08/30/S1B_IW_GRDH_1SDV_20180830T054945_20180830T055010_012488_01707F_CADF_COG.SAFE', 'S2A_MSIL2A_20180717T104021_N0500_R008_T31UFS_20230822T211203', 'S2B_MSIL2A_20180715T105029_N0500_R051_T31UFS_20230821T205551', 'S2B_MSIL2A_20190421T105039_N0500_R051_T31UFS_20221027T231827', 'S2A_MSIL2A_20190215T105131_N0500_R051_T31UFS_20221204T114401', 'S2B_MSIL2A_20190418T104029_N0500_R008_T31UFS_20221027T005906', 'S2B_MSIL2A_20180722T104019_N0500_R008_T31UFS_20230817T052733', 'S2A_MSIL2A_20181117T105321_N0500_R051_T31UFS_20230704T160613', 'S2B_MSIL2A_20181020T104049_N0500_R008_T31UFS_20230818T105637', 'S2B_MSIL2A_20181119T104329_N0500_R008_T31UFS_20230709T100352', 'S2A_MSIL2A_20180610T105031_N0500_R051_T31UFS_20230715T224609', 'S2B_MSIL2A_20181219T104439_N0500_R008_T31UFS_20230727T082957', 'S2B_MSIL2A_20181003T105019_N0500_R051_T31UFS_20230809T235449', 'S2A_MSIL2A_20180720T105031_N0500_R...
E only in expected: {'/eodata/Sentinel-2/MSI/L2A_N0500/2018/05/16/S2B_MSIL2A_20180516T105029_N0500_R051_T31UFS_20230902T203744.SAFE', '/eodata/Sentinel-2/MSI/L2A_N0500/2018/08/31/S2B_MSIL2A_20180831T104019_N0500_R008_T31UFS_20230712T215106.SAFE', '/eodata/auxdata/CopDEM/COP-DEM_GLO-30-DGED/DEM1_SAR_DGE_30_20110322T172627_20130110T172720_ADS_000000_CFsl.DEM', '/eodata/Sentinel-2/MSI/L2A_N0500/2019/04/11/S2B_MSIL2A_20190411T105029_N0500_R051_T31UFS_20221020T002157.SAFE', '/eodata/Sentinel-2/MSI/L2A_N0500/2019/03/17/S2A_MSIL2A_20190317T105021_N0500_R051_T31UFS_20231116T023124.SAFE', '/eodata/Sentinel-2/MSI/L2A_N0500/2018/05/06/S2B_MSIL2A_20180506T105029_N0500_R051_T31UFS_20231015T015113.SAFE', '/eodata/Sentinel-2/MSI/L2A_N0500/2018/12/22/S2B_MSIL2A_20181222T105449_N0500_R051_T31UFS_20230617T002646.SAFE', '/eodata/Sentinel-2/MSI/L2A_N0500/2018/07/12/S2B_MSIL2A_20180712T104019_N0500_R008_T31UFS_20230714T031225.SAFE', '/eodata/Sentinel-2/MSI/L2A_N0500/2018/05/08/S2A_MSIL2A_20180508T104031_N0500_R008_T31UFS_2....
/opt/hostedtoolcache/Python/3.12.12/x64/lib/python3.12/site-packages/openeo/testing/results.py:521: AssertionError
----------------------------- Captured stdout call -----------------------------
0:00:00 Job 'cdse-j-2603121344324ae4bb78ba96bc41ed87': send 'start'
0:00:20 Job 'cdse-j-2603121344324ae4bb78ba96bc41ed87': created (progress 0%)
0:00:26 Job 'cdse-j-2603121344324ae4bb78ba96bc41ed87': created (progress 0%)
0:00:32 Job 'cdse-j-2603121344324ae4bb78ba96bc41ed87': created (progress 0%)
0:00:42 Job 'cdse-j-2603121344324ae4bb78ba96bc41ed87': created (progress 0%)
0:00:52 Job 'cdse-j-2603121344324ae4bb78ba96bc41ed87': created (progress 0%)
0:01:05 Job 'cdse-j-2603121344324ae4bb78ba96bc41ed87': running (progress N/A)
0:01:20 Job 'cdse-j-2603121344324ae4bb78ba96bc41ed87': running (progress N/A)
0:01:40 Job 'cdse-j-2603121344324ae4bb78ba96bc41ed87': running (progress N/A)
0:02:04 Job 'cdse-j-2603121344324ae4bb78ba96bc41ed87': running (progress N/A)
0:02:34 Job 'cdse-j-2603121344324ae4bb78ba96bc41ed87': running (progress N/A)
0:03:12 Job 'cdse-j-2603121344324ae4bb78ba96bc41ed87': running (progress N/A)
0:03:59 Job 'cdse-j-2603121344324ae4bb78ba96bc41ed87': running (progress N/A)
0:04:57 Job 'cdse-j-2603121344324ae4bb78ba96bc41ed87': running (progress N/A)
0:05:58 Job 'cdse-j-2603121344324ae4bb78ba96bc41ed87': running (progress N/A)
0:06:58 Job 'cdse-j-2603121344324ae4bb78ba96bc41ed87': running (progress N/A)
0:07:58 Job 'cdse-j-2603121344324ae4bb78ba96bc41ed87': running (progress N/A)
0:08:59 Job 'cdse-j-2603121344324ae4bb78ba96bc41ed87': running (progress N/A)
0:09:59 Job 'cdse-j-2603121344324ae4bb78ba96bc41ed87': running (progress N/A)
0:11:00 Job 'cdse-j-2603121344324ae4bb78ba96bc41ed87': running (progress N/A)
0:12:00 Job 'cdse-j-2603121344324ae4bb78ba96bc41ed87': finished (progress 100%)
----------------------------- Captured stderr call -----------------------------
Warning 1: TIFFReadDirectory:Sum of Photometric type-related color channels and ExtraSamples doesn't match SamplesPerPixel. Defining non-color channels as ExtraSamples.
Warning 1: TIFFReadDirectory:Sum of Photometric type-related color channels and ExtraSamples doesn't match SamplesPerPixel. Defining non-color channels as ExtraSamples.
------------------------------ Captured log call -------------------------------
INFO conftest:conftest.py:145 Connecting to 'openeofed.dataspace.copernicus.eu'
INFO openeo.config:config.py:193 Loaded openEO client config from sources: []
INFO conftest:conftest.py:158 Checking for auth_env_var='OPENEO_AUTH_CLIENT_CREDENTIALS_CDSEFED' to drive auth against url='openeofed.dataspace.copernicus.eu'.
INFO conftest:conftest.py:162 Extracted provider_id='CDSE' client_id='openeo-apex-benchmarks-service-account' from auth_env_var='OPENEO_AUTH_CLIENT_CREDENTIALS_CDSEFED'
INFO openeo.rest.connection:connection.py:302 Found OIDC providers: ['CDSE']
INFO openeo.rest.auth.oidc:oidc.py:404 Doing 'client_credentials' token request 'https://identity.dataspace.copernicus.eu/auth/realms/CDSE/protocol/openid-connect/token' with post data fields ['grant_type', 'client_id', 'client_secret', 'scope'] (client_id 'openeo-apex-benchmarks-service-account')
INFO openeo.rest.connection:connection.py:401 Obtained tokens: ['access_token', 'id_token']
INFO openeo.rest.job:job.py:436 Downloading Job result asset 'worldcereal_crop_type_postprocessed.tif' from https://s3.waw3-1.openeo.v1.dataspace.copernicus.eu/openeo-data-prod-waw4-1/batch_jobs/j-2603121344324ae4bb78ba96bc41ed87/worldcereal_crop_type_postprocessed.tif?X-Proxy-Head-As-Get=true&X-Amz-Algorithm=AWS4-HMAC-SHA256&X-Amz-Credential=fb2bee26bab8450685634a9de9c91904%2F20260312%2Fwaw4-1%2Fs3%2Faws4_request&X-Amz-Date=20260312T135635Z&X-Amz-Expires=86400&X-Amz-SignedHeaders=host&X-Amz-Security-Token=eyJhbGciOiJSUzI1NiIsInR5cCI6IkpXVCJ9.eyJyb2xlX2FybiI6ImFybjpvcGVuZW93czppYW06Ojpyb2xlL29wZW5lby1kYXRhLXByb2Qtd2F3NC0xLXdvcmtzcGFjZSIsImluaXRpYWxfaXNzdWVyIjoib3BlbmVvLnByb2Qud2F3My0xLm9wZW5lby1pbnQudjEuZGF0YXNwYWNlLmNvcGVybmljdXMuZXUiLCJodHRwczovL2F3cy5hbWF6b24uY29tL3RhZ3MiOnsicHJpbmNpcGFsX3RhZ3MiOnsiam9iX2lkIjpbImotMjYwMzEyMTM0NDMyNGFlNGJiNzhiYTk2YmM0MWVkODciXSwidXNlcl9pZCI6WyI2YTc3ZmNkMS05YzA4LTQ2ZTktYjg3NS01NGZiOTk5YWIyMDAiXX0sInRyYW5zaXRpdmVfdGFnX2tleXMiOlsidXNlcl9pZCIsImpvYl9pZCJdfSwiaXNzIjoic3RzLndhdzMtMS5vcGVuZW8udjEuZGF0YXNwYWNlLmNvcGVybmljdXMuZXUiLCJzdWIiOiJvcGVuZW8tZHJpdmVyIiwiZXhwIjoxNzczMzY2OTk1LCJuYmYiOjE3NzMzMjM3OTUsImlhdCI6MTc3MzMyMzc5NSwianRpIjoiZjg5MWE0ZmEtNGE1ZS00OWM3LWE3NTMtMTc2NWVhMDk0YTVkIiwiYWNjZXNzX2tleV9pZCI6ImZiMmJlZTI2YmFiODQ1MDY4NTYzNGE5ZGU5YzkxOTA0In0.qZkeO897XdvslgVytXXQgeZ4wEGV4eUm2ypbxZmvkNev4BwU5myOgZenARFJEAnsDw_DzT1OtNIUtRNAiyV7QSIEduKiqCaadJwwbRabkmLvayRZIzICuoTZqnO_toufBOvo9szYBrz1MO8zrnzsOx0AbWzPhPh83fqvBVg-SkAWmNOhWftg9cZsAJmYeXEVt5hEGY7_Iq7YD22QHGF7oMxUprbpurf3p0y3F2h8UK4Ff-tFFk0vDYe4BJgZTYq5FP9hx7AHoJb8zYAOplD68dsIvzJzDrBkEjuMxN0RUp-RhITUFQroqtRGH_mxZDybHppyngIXLBqjWup98mfhBA&X-Amz-Signature=74f386589a110e27e6d59b44385c95dcfe4f1b47b62435bf6d976cef6eb3ccb8 to /home/runner/work/apex_algorithms/apex_algorithms/qa/benchmarks/tmp_path_root/test_run_benchmark_worldcereal0/actual/worldcereal_crop_type_postprocessed.tif
INFO apex_algorithm_qa_tools.scenarios:util.py:345 Downloading reference data for scenario.id='worldcereal_crop_type' to reference_dir=PosixPath('/home/runner/work/apex_algorithms/apex_algorithms/qa/benchmarks/tmp_path_root/test_run_benchmark_worldcereal0/reference'): start 2026-03-12 13:56:38.752462
INFO apex_algorithm_qa_tools.scenarios:util.py:345 Downloading source='https://s3.waw3-1.cloudferro.com/apex-benchmarks/gh-22353750758!tests_test_benchmarks.py__test_run_benchmark_worldcereal_crop_type_!actual/worldcereal_crop_type_postprocessed.tif' to path=PosixPath('/home/runner/work/apex_algorithms/apex_algorithms/qa/benchmarks/tmp_path_root/test_run_benchmark_worldcereal0/reference/worldcereal_crop_type_postprocessed.tif'): start 2026-03-12 13:56:38.752769
INFO apex_algorithm_qa_tools.scenarios:util.py:351 Downloading source='https://s3.waw3-1.cloudferro.com/apex-benchmarks/gh-22353750758!tests_test_benchmarks.py__test_run_benchmark_worldcereal_crop_type_!actual/worldcereal_crop_type_postprocessed.tif' to path=PosixPath('/home/runner/work/apex_algorithms/apex_algorithms/qa/benchmarks/tmp_path_root/test_run_benchmark_worldcereal0/reference/worldcereal_crop_type_postprocessed.tif'): end 2026-03-12 13:56:39.552176, elapsed 0:00:00.799407
INFO apex_algorithm_qa_tools.scenarios:util.py:345 Downloading source='https://s3.waw3-1.cloudferro.com/apex-benchmarks/gh-22353750758!tests_test_benchmarks.py__test_run_benchmark_worldcereal_crop_type_!actual/job-results.json' to path=PosixPath('/home/runner/work/apex_algorithms/apex_algorithms/qa/benchmarks/tmp_path_root/test_run_benchmark_worldcereal0/reference/job-results.json'): start 2026-03-12 13:56:39.552473
INFO apex_algorithm_qa_tools.scenarios:util.py:351 Downloading source='https://s3.waw3-1.cloudferro.com/apex-benchmarks/gh-22353750758!tests_test_benchmarks.py__test_run_benchmark_worldcereal_crop_type_!actual/job-results.json' to path=PosixPath('/home/runner/work/apex_algorithms/apex_algorithms/qa/benchmarks/tmp_path_root/test_run_benchmark_worldcereal0/reference/job-results.json'): end 2026-03-12 13:56:40.851481, elapsed 0:00:01.299008
INFO apex_algorithm_qa_tools.scenarios:util.py:351 Downloading reference data for scenario.id='worldcereal_crop_type' to reference_dir=PosixPath('/home/runner/work/apex_algorithms/apex_algorithms/qa/benchmarks/tmp_path_root/test_run_benchmark_worldcereal0/reference'): end 2026-03-12 13:56:40.851633, elapsed 0:00:02.099171
INFO openeo.testing.results:results.py:422 Comparing job results: PosixPath('/home/runner/work/apex_algorithms/apex_algorithms/qa/benchmarks/tmp_path_root/test_run_benchmark_worldcereal0/actual') vs PosixPath('/home/runner/work/apex_algorithms/apex_algorithms/qa/benchmarks/tmp_path_root/test_run_benchmark_worldcereal0/reference')
WARNING rasterio._env:__init__.py:367 CPLE_AppDefined in worldcereal_crop_type_postprocessed.tif: TIFFReadDirectory:Sum of Photometric type-related color channels and ExtraSamples doesn't match SamplesPerPixel. Defining non-color channels as ExtraSamples.
WARNING rasterio._env:__init__.py:367 CPLE_AppDefined in worldcereal_crop_type_postprocessed.tif: TIFFReadDirectory:Sum of Photometric type-related color channels and ExtraSamples doesn't match SamplesPerPixel. Defining non-color channels as ExtraSamples.
Reactions are currently unavailable
Metadata
Metadata
Assignees
Labels
benchmark-failureintegration test failedintegration test failed