-
Notifications
You must be signed in to change notification settings - Fork 12
Open
Labels
benchmark-failureintegration test failedintegration test failed
Description
Benchmark scenario ID: forest_fire_mapping
Benchmark scenario definition: https://github.com/ESA-APEx/apex_algorithms/blob/91426ff33c094b53df24495e4582d2f8937475ce/algorithm_catalog/vito/random_forest_firemapping/benchmark_scenarios/random_forest_firemapping.json
openEO backend: openeo.vito.be
GitHub Actions workflow run: https://github.com/ESA-APEx/apex_algorithms/actions/runs/22856487381
Workflow artifacts: https://github.com/ESA-APEx/apex_algorithms/actions/runs/22856487381#artifacts
Test start: 2026-03-09 13:47:07.978254+00:00
Test duration: 0:03:59.805207
Test outcome: ❌ failed
Last successful test phase: create-job
Failure in test phase: run-job
Contact Information
| Name | Organization | Contact |
|---|---|---|
| Pratichhya Sharma | VITO | Contact via VITO (VITO Website, GitHub) |
Process Graph
{
"randomforestfiremapping1": {
"arguments": {
"padding_window_size": 33,
"spatial_extent": {
"coordinates": [
[
[
-17.996638457335074,
28.771993378019005
],
[
-17.960989271845406,
28.822652746872745
],
[
-17.913144312372435,
28.85454938652139
],
[
-17.842315009623224,
28.83015783855478
],
[
-17.781805207936817,
28.842353612538087
],
[
-17.728331429702315,
28.74103487483061
],
[
-17.766795024572748,
28.681932277834584
],
[
-17.75131577297855,
28.624236885528937
],
[
-17.756944591740076,
28.579206335436727
],
[
-17.838093395552082,
28.451150708612
],
[
-17.871397239891113,
28.480702007110015
],
[
-17.88969090086607,
28.57404658490533
],
[
-17.957705794234517,
28.658947934558352
],
[
-18.003674480786984,
28.76167387695621
],
[
-18.003674480786984,
28.76167387695621
],
[
-17.996638457335074,
28.771993378019005
]
]
],
"type": "Polygon"
},
"temporal_extent": [
"2023-07-15",
"2023-09-15"
]
},
"namespace": "https://raw.githubusercontent.com/ESA-APEx/apex_algorithms/refs/heads/main/algorithm_catalog/vito/random_forest_firemapping/openeo_udp/random_forest_firemapping.json",
"process_id": "random_forest_firemapping",
"result": true
}
}Error Logs
scenario = BenchmarkScenario(id='forest_fire_mapping', description='Forest Fire Mapping using Random Forest based on Sentinel-2 a.../apex_algorithms/algorithm_catalog/vito/random_forest_firemapping/benchmark_scenarios/random_forest_firemapping.json'))
connection_factory = <function connection_factory.<locals>.get_connection at 0x7f9b783e5da0>
tmp_path = PosixPath('/home/runner/work/apex_algorithms/apex_algorithms/qa/benchmarks/tmp_path_root/test_run_benchmark_forest_fire0')
track_metric = <function track_metric.<locals>.track at 0x7f9b783e5ee0>
track_phase = <function track_phase.<locals>.track at 0x7f9b783e6020>
upload_assets_on_fail = <function upload_assets_on_fail.<locals>.collect at 0x7f9b783e60c0>
request = <FixtureRequest for <Function test_run_benchmark[forest_fire_mapping]>>
@pytest.mark.parametrize(
"scenario",
[
# Use scenario id as parameterization id to give nicer test names.
pytest.param(uc, id=uc.id)
for uc in get_benchmark_scenarios()
],
)
def test_run_benchmark(
scenario: BenchmarkScenario,
connection_factory,
tmp_path: Path,
track_metric,
track_phase,
upload_assets_on_fail,
request,
):
track_metric("scenario_id", scenario.id)
with track_phase(phase="connect"):
# Check if a backend override has been provided via cli options.
override_backend = request.config.getoption("--override-backend")
backend_filter = request.config.getoption("--backend-filter")
if backend_filter and not re.match(backend_filter, scenario.backend):
# TODO apply filter during scenario retrieval, but seems to be hard to retrieve cli param
pytest.skip(
f"skipping scenario {scenario.id} because backend {scenario.backend} does not match filter {backend_filter!r}"
)
backend = scenario.backend
if override_backend:
_log.info(f"Overriding backend URL with {override_backend!r}")
backend = override_backend
connection: openeo.Connection = connection_factory(url=backend)
report_path = None
with track_phase(phase="create-job"):
# TODO #14 scenario option to use synchronous instead of batch job mode?
job = connection.create_job(
process_graph=scenario.process_graph,
title=f"APEx benchmark {scenario.id}",
additional=scenario.job_options,
)
track_metric("job_id", job.job_id)
if request.config.getoption("--upload-benchmark-report"):
report_path = tmp_path / "benchmark_report.json"
report_path.write_text(json.dumps({
"job_id": job.job_id,
"scenario_id": scenario.id,
"scenario_description": scenario.description,
"scenario_backend": scenario.backend,
"scenario_source": str(scenario.source) if scenario.source else None,
"reference_data": scenario.reference_data,
"reference_options": scenario.reference_options,
}, indent=2))
upload_assets_on_fail(report_path)
with track_phase(phase="run-job"):
# TODO: monitor timing and progress
# TODO: separate "job started" and run phases?
max_minutes = request.config.getoption("--maximum-job-time-in-minutes")
if max_minutes:
def _timeout_handler(signum, frame):
raise TimeoutError(
f"Batch job {job.job_id} exceeded maximum allowed time of {max_minutes} minutes"
)
old_handler = signal.signal(signal.SIGALRM, _timeout_handler)
signal.alarm(max_minutes * 60)
try:
> job.start_and_wait()
tests/test_benchmarks.py:96:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <BatchJob job_id='j-2603091347114f55b25ef924214f0629'>
def start_and_wait(
self,
*,
print=print,
max_poll_interval: float = DEFAULT_JOB_STATUS_POLL_INTERVAL_MAX,
connection_retry_interval: float = DEFAULT_JOB_STATUS_POLL_CONNECTION_RETRY_INTERVAL,
soft_error_max: int = DEFAULT_JOB_STATUS_POLL_SOFT_ERROR_MAX,
show_error_logs: bool = True,
require_success: bool = True,
) -> BatchJob:
"""
Start the batch job, poll its status and wait till it finishes (or fails)
:param print: print/logging function to show progress/status
:param max_poll_interval: maximum number of seconds to sleep between job status polls
:param connection_retry_interval: how long to wait when status poll failed due to connection issue
:param soft_error_max: maximum number of soft errors (e.g. temporary connection glitches) to allow
:param show_error_logs: whether to automatically print error logs when the batch job failed.
:param require_success: whether to raise an exception if the job did not finish successfully.
:return: Handle to the job created at the backend.
.. versionchanged:: 0.37.0
Added argument ``show_error_logs``.
.. versionchanged:: 0.42.0
All arguments must be specified as keyword arguments,
to eliminate the risk of positional mix-ups between heterogeneous arguments and flags.
.. versionchanged:: 0.42.0
Added argument ``require_success``.
"""
# TODO rename `connection_retry_interval` to something more generic?
start_time = time.time()
def elapsed() -> str:
return str(datetime.timedelta(seconds=time.time() - start_time)).rsplit(".")[0]
def print_status(msg: str):
print("{t} Job {i!r}: {m}".format(t=elapsed(), i=self.job_id, m=msg))
# TODO: make `max_poll_interval`, `connection_retry_interval` class constants or instance properties?
print_status("send 'start'")
self.start()
# TODO: also add `wait` method so you can track a job that already has started explicitly
# or just rename this method to `wait` and automatically do start if not started yet?
# Start with fast polling.
poll_interval = min(5, max_poll_interval)
status = None
_soft_error_count = 0
def soft_error(message: str):
"""Non breaking error (unless we had too much of them)"""
nonlocal _soft_error_count
_soft_error_count += 1
if _soft_error_count > soft_error_max:
raise OpenEoClientException("Excessive soft errors")
print_status(message)
time.sleep(connection_retry_interval)
while True:
# TODO: also allow a hard time limit on this infinite poll loop?
try:
job_info = self.describe()
except requests.ConnectionError as e:
soft_error("Connection error while polling job status: {e}".format(e=e))
continue
except OpenEoApiPlainError as e:
if e.http_status_code in [HTTP_502_BAD_GATEWAY, HTTP_503_SERVICE_UNAVAILABLE]:
soft_error("Service availability error while polling job status: {e}".format(e=e))
continue
else:
raise
status = job_info.get("status", "N/A")
progress = job_info.get("progress")
if isinstance(progress, int):
progress = f"{progress:d}%"
elif isinstance(progress, float):
progress = f"{progress:.1f}%"
else:
progress = "N/A"
print_status(f"{status} (progress {progress})")
if status not in ('submitted', 'created', 'queued', 'running'):
break
# Sleep for next poll (and adaptively make polling less frequent)
time.sleep(poll_interval)
poll_interval = min(1.25 * poll_interval, max_poll_interval)
if require_success and status != "finished":
# TODO: render logs jupyter-aware in a notebook context?
if show_error_logs:
print(f"Your batch job {self.job_id!r} failed. Error logs:")
print(self.logs(level=logging.ERROR))
print(
f"Full logs can be inspected in an openEO (web) editor or with `connection.job({self.job_id!r}).logs()`."
)
> raise JobFailedException(
f"Batch job {self.job_id!r} didn't finish successfully. Status: {status} (after {elapsed()}).",
job=self,
)
E openeo.rest.JobFailedException: Batch job 'j-2603091347114f55b25ef924214f0629' didn't finish successfully. Status: error (after 0:03:56).
/opt/hostedtoolcache/Python/3.12.12/x64/lib/python3.12/site-packages/openeo/rest/job.py:382: JobFailedException
----------------------------- Captured stdout call -----------------------------
0:00:00 Job 'j-2603091347114f55b25ef924214f0629': send 'start'
0:00:54 Job 'j-2603091347114f55b25ef924214f0629': queued (progress 0%)
0:00:59 Job 'j-2603091347114f55b25ef924214f0629': queued (progress 0%)
0:01:06 Job 'j-2603091347114f55b25ef924214f0629': queued (progress 0%)
0:01:16 Job 'j-2603091347114f55b25ef924214f0629': queued (progress 0%)
0:01:26 Job 'j-2603091347114f55b25ef924214f0629': running (progress 6.6%)
0:01:41 Job 'j-2603091347114f55b25ef924214f0629': running (progress 8.7%)
0:01:56 Job 'j-2603091347114f55b25ef924214f0629': running (progress 10.8%)
0:02:15 Job 'j-2603091347114f55b25ef924214f0629': running (progress 13.3%)
0:02:40 Job 'j-2603091347114f55b25ef924214f0629': running (progress 16.2%)
0:03:14 Job 'j-2603091347114f55b25ef924214f0629': running (progress 19.7%)
0:03:52 Job 'j-2603091347114f55b25ef924214f0629': error (progress N/A)
Your batch job 'j-2603091347114f55b25ef924214f0629' failed. Error logs:
[{'id': '[1773064209811, 42284]', 'time': '2026-03-09T13:50:09.811Z', 'level': 'error', 'message': "OpenEO batch job failed: NoDataAvailableException(status_code=400, code='NoDataAvailable', message='There is no data available for the given extents.', id='no-request')"}]
Full logs can be inspected in an openEO (web) editor or with `connection.job('j-2603091347114f55b25ef924214f0629').logs()`.
------------------------------ Captured log call -------------------------------
INFO conftest:conftest.py:145 Connecting to 'openeo.vito.be'
INFO openeo.config:config.py:193 Loaded openEO client config from sources: []
INFO conftest:conftest.py:158 Checking for auth_env_var='OPENEO_AUTH_CLIENT_CREDENTIALS_TERRASCOPE' to drive auth against url='openeo.vito.be'.
INFO conftest:conftest.py:162 Extracted provider_id='terrascope' client_id='openeo-apex-service-account' from auth_env_var='OPENEO_AUTH_CLIENT_CREDENTIALS_TERRASCOPE'
INFO openeo.rest.connection:connection.py:302 Found OIDC providers: ['egi', 'terrascope', 'CDSE']
INFO openeo.rest.auth.oidc:oidc.py:404 Doing 'client_credentials' token request 'https://sso.terrascope.be/auth/realms/terrascope/protocol/openid-connect/token' with post data fields ['grant_type', 'client_id', 'client_secret', 'scope'] (client_id 'openeo-apex-service-account')
INFO openeo.rest.connection:connection.py:401 Obtained tokens: ['access_token', 'id_token']
Reactions are currently unavailable
Metadata
Metadata
Assignees
Labels
benchmark-failureintegration test failedintegration test failed