Skip to content

Commit 502cb7a

Browse files
committed
jnb_open_link to workflow ogc api record
1 parent a6b8eec commit 502cb7a

File tree

3 files changed

+111
-30
lines changed

3 files changed

+111
-30
lines changed

deep_code/constants.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,3 +32,10 @@
3232
DEEPESDL_GIT_PULL_BASE = (
3333
"https://deep.earthsystemdatalab.net/hub/user-redirect/git-pull"
3434
)
35+
APPLICATION_TYPE_JUPYTER_SPEC = (
36+
"https://raw.githubusercontent.com/EOEPCA/metadata"
37+
"-profile/refs/heads/1.0/schemas/application-type-jupyter-notebook"
38+
)
39+
APPLICATION_STAC_EXTENSION_SPEC = (
40+
"https://stac-extensions.github.io/application/v0.1.0/schema.json"
41+
)

deep_code/tools/publish.py

Lines changed: 13 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -219,6 +219,7 @@ def publish_dataset(self, write_to_file: bool = False):
219219
osc_region = self.dataset_config.get("osc_region")
220220
osc_themes = self.dataset_config.get("osc_themes")
221221
cf_params = self.dataset_config.get("cf_parameter")
222+
license_type = self.dataset_config.get("license_type")
222223

223224
if not dataset_id or not self.collection_id:
224225
raise ValueError("Dataset ID or Collection ID missing in the config.")
@@ -230,6 +231,7 @@ def publish_dataset(self, write_to_file: bool = False):
230231
collection_id=self.collection_id,
231232
workflow_id=self.workflow_id,
232233
workflow_title=self.workflow_title,
234+
license_type=license_type,
233235
documentation_link=documentation_link,
234236
access_link=access_link,
235237
osc_status=dataset_status,
@@ -314,7 +316,7 @@ def _update_base_catalog(
314316

315317
return base_catalog
316318

317-
def publish_workflow_experiment(self, write_to_file: bool = False):
319+
def generate_workflow_experiment_records(self, write_to_file: bool = False):
318320
"""prepare workflow and experiment as ogc api record to publish it to the
319321
specified GitHub repository."""
320322
workflow_id = self._normalize_name(self.workflow_config.get("workflow_id"))
@@ -339,12 +341,16 @@ def publish_workflow_experiment(self, write_to_file: bool = False):
339341
application_link = link_builder.build_link_to_jnb(
340342
self.workflow_title, jupyter_notebook_url
341343
)
344+
jnb_open_link = link_builder.make_related_link_for_opening_jnb_from_github(
345+
jupyter_notebook_url=jupyter_notebook_url
346+
)
347+
342348
workflow_record = WorkflowAsOgcRecord(
343349
id=workflow_id,
344350
type="Feature",
345351
title=self.workflow_title,
346352
properties=wf_record_properties,
347-
links=links + theme_links + application_link,
353+
links=links + theme_links + application_link + jnb_open_link,
348354
jupyter_notebook_url=jupyter_notebook_url,
349355
themes=osc_themes,
350356
)
@@ -354,6 +360,7 @@ def publish_workflow_experiment(self, write_to_file: bool = False):
354360
del workflow_dict["jupyter_notebook_url"]
355361
if "osc_workflow" in workflow_dict["properties"]:
356362
del workflow_dict["properties"]["osc_workflow"]
363+
# add workflow record to file_dict
357364
wf_file_path = f"workflows/{workflow_id}/record.json"
358365
file_dict = {wf_file_path: workflow_dict}
359366

@@ -380,6 +387,7 @@ def publish_workflow_experiment(self, write_to_file: bool = False):
380387
del experiment_dict["collection_id"]
381388
if "osc:project" in experiment_dict["properties"]:
382389
del experiment_dict["properties"]["osc:project"]
390+
# add experiment record to file_dict
383391
exp_file_path = f"experiments/{workflow_id}/record.json"
384392
file_dict[exp_file_path] = experiment_dict
385393

@@ -406,7 +414,9 @@ def publish_all(self, write_to_file: bool = False):
406414
"""Publish both dataset and workflow/experiment in a single PR."""
407415
# Get file dictionaries from both methods
408416
dataset_files = self.publish_dataset(write_to_file=write_to_file)
409-
workflow_files = self.publish_workflow_experiment(write_to_file=write_to_file)
417+
workflow_files = self.generate_workflow_experiment_records(
418+
write_to_file=write_to_file
419+
)
410420

411421
# Combine the file dictionaries
412422
combined_files = {**dataset_files, **workflow_files}

deep_code/utils/ogc_api_record.py

Lines changed: 91 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,12 @@
1-
from typing import Any, Optional
2-
from urllib.parse import quote, urlencode
1+
from typing import Any, Optional, Tuple
2+
from urllib.parse import quote, urlencode, urlparse
33

44
from xrlint.util.constructible import MappingConstructible
55
from xrlint.util.serializable import JsonSerializable, JsonValue
66

77
from deep_code.constants import (
8+
APPLICATION_STAC_EXTENSION_SPEC,
9+
APPLICATION_TYPE_JUPYTER_SPEC,
810
BASE_URL_OSC,
911
DEEPESDL_GIT_PULL_BASE,
1012
OGC_API_RECORD_SPEC,
@@ -141,36 +143,80 @@ def build_link_to_jnb(self, workflow_title, jupyter_nb_url):
141143
}
142144
]
143145

144-
def build_deepesdl_notebook_href(
145-
repo_url: str,
146-
notebook_path: str,
147-
branch: str = "main",
146+
@staticmethod
147+
def _parse_github_notebook_url(url: str) -> Tuple[str, str, str, str]:
148+
"""
149+
Returns (repo_url, repo_name, branch, file_path_in_repo) from a GitHub URL.
150+
151+
Supports:
152+
- https://github.com/<owner>/<repo>/blob/<branch>/<path/to/notebook.ipynb>
153+
- https://raw.githubusercontent.com/<owner>/<repo>/<branch>/<path/to/notebook.ipynb>
154+
"""
155+
p = urlparse(url)
156+
parts = p.path.strip("/").split("/")
157+
158+
if p.netloc == "github.com":
159+
if len(parts) >= 5 and parts[2] in ("blob", "tree"):
160+
owner, repo, _blob_or_tree, branch = parts[:4]
161+
file_path = "/".join(parts[4:])
162+
else:
163+
raise ValueError(f"Unexpected GitHub URL format: {url}")
164+
repo_url = f"https://github.com/{owner}/{repo}"
165+
repo_name = repo
166+
167+
elif p.netloc == "raw.githubusercontent.com":
168+
if len(parts) >= 4:
169+
owner, repo, branch = parts[:3]
170+
file_path = "/".join(parts[3:])
171+
else:
172+
raise ValueError(f"Unexpected raw.githubusercontent URL format: {url}")
173+
repo_url = f"https://github.com/{owner}/{repo}"
174+
repo_name = repo
175+
176+
else:
177+
raise ValueError(f"Only GitHub URLs are supported: {url}")
178+
179+
return repo_url, repo_name, branch, file_path
180+
181+
def build_deepesdl_notebook_href_from_github(
182+
self,
183+
jupyter_notebook_url: str,
148184
base_redirect: str = DEEPESDL_GIT_PULL_BASE,
185+
branch_override: str | None = None,
149186
) -> str:
150187
"""
151-
Build a DeepESDL git-pull redirect URL:
152-
{base}?repo=<encoded_repo>&urlpath=<encoded_lab_tree_path>&branch=<encoded_branch>
188+
Build DeepESDL git-pull redirect from a full GitHub notebook URL.
189+
{base}?repo=<repo_url>&urlpath=lab/tree/<repo_name>/<path>&branch=<branch>
153190
"""
191+
repo_url, repo_name, branch, file_path = self._parse_github_notebook_url(
192+
jupyter_notebook_url
193+
)
194+
if branch_override:
195+
branch = branch_override
196+
154197
params = {
155198
"repo": repo_url,
156-
"urlpath": f"lab/tree/{notebook_path.lstrip('/')}",
199+
"urlpath": f"lab/tree/{repo_name}/{file_path}",
157200
"branch": branch,
158201
}
159202
return f"{base_redirect}?{urlencode(params, quote_via=quote)}"
160203

161-
def make_related_link_for_opening_jnb(
204+
def make_related_link_for_opening_jnb_from_github(
162205
self,
163-
repo_url: str,
164-
notebook_path: str,
165-
branch: str = "main",
206+
jupyter_notebook_url: str,
166207
title: str = "Open notebook on the DeepESDL platform",
208+
branch_override: str | None = None,
167209
) -> dict[str, str]:
168-
return {
169-
"rel": "related",
170-
"href": self.build_deepesdl_notebook_href(repo_url, notebook_path, branch),
171-
"type": "text/html",
172-
"title": title,
173-
}
210+
return [
211+
{
212+
"rel": "related",
213+
"href": self.build_deepesdl_notebook_href_from_github(
214+
jupyter_notebook_url, branch_override=branch_override
215+
),
216+
"type": "text/html",
217+
"title": title,
218+
}
219+
]
174220

175221

176222
class WorkflowAsOgcRecord(MappingConstructible["OgcRecord"], JsonSerializable):
@@ -188,7 +234,11 @@ def __init__(
188234
themes: Optional[Any] = None,
189235
):
190236
if conformsTo is None:
191-
conformsTo = [OGC_API_RECORD_SPEC]
237+
conformsTo = [
238+
OGC_API_RECORD_SPEC,
239+
APPLICATION_TYPE_JUPYTER_SPEC,
240+
APPLICATION_STAC_EXTENSION_SPEC,
241+
]
192242
self.id = id
193243
self.type = type
194244
self.title = title
@@ -227,6 +277,14 @@ def _generate_static_links(self):
227277
"title": "Jupyter Notebook",
228278
"href": f"{self.jupyter_notebook_url}",
229279
},
280+
{
281+
"rel": "application-originating-platform",
282+
"title": "DeepESDL platform",
283+
"href": "https://deep.earthsystemdatalab.net/",
284+
"type": "text/html",
285+
"application:platform_supports": ["jupyter-notebook"],
286+
"application:preferred_app": "JupyterLab",
287+
},
230288
{
231289
"rel": "related",
232290
"href": f"../../projects/{PROJECT_COLLECTION_NAME}/collection.json",
@@ -258,7 +316,11 @@ def __init__(
258316
if linkTemplates is None:
259317
linkTemplates = []
260318
if conformsTo is None:
261-
conformsTo = [OGC_API_RECORD_SPEC]
319+
conformsTo = [
320+
OGC_API_RECORD_SPEC,
321+
APPLICATION_TYPE_JUPYTER_SPEC,
322+
APPLICATION_STAC_EXTENSION_SPEC,
323+
]
262324
self.id = id
263325
self.title = title
264326
self.type = type
@@ -291,18 +353,20 @@ def _generate_static_links(self):
291353
"type": "application/json",
292354
"title": f"Workflow: {self.title}",
293355
},
294-
# {
295-
# "rel": "child",
296-
# "href": f"../../products/{self.collection_id}/collection.json",
297-
# "type": "application/json",
298-
# "title": f"{self.collection_id}",
299-
# },
300356
{
301357
"rel": "related",
302358
"href": f"../../projects/{PROJECT_COLLECTION_NAME}/collection.json",
303359
"type": "application/json",
304360
"title": "Project: DeepESDL",
305361
},
362+
{
363+
"rel": "application-originating-platform",
364+
"title": "DeepESDL platform",
365+
"href": "https://deep.earthsystemdatalab.net/",
366+
"type": "text/html",
367+
"application:platform_supports": ["jupyter-notebook"],
368+
"application:preferred_app": "JupyterLab",
369+
},
306370
{
307371
"rel": "input",
308372
"href": "./input.yaml",

0 commit comments

Comments
 (0)