From 7dece304d305f46ed2cbed4b6acac2e7bf0f60b9 Mon Sep 17 00:00:00 2001 From: MaximilianSoerenPollak Date: Wed, 11 Mar 2026 20:48:26 +0100 Subject: [PATCH 01/11] WIP: Local Works somewhat --- docs.bzl | 17 +- scripts_bazel/generate_sourcelinks_cli.py | 51 ++++- scripts_bazel/merge_sourcelinks.py | 70 ++++++- .../score_source_code_linker/__init__.py | 184 +++++++++--------- .../score_source_code_linker/metadata.py | 23 +++ .../module_source_links.py | 137 +++++++++++++ .../need_source_links.py | 51 +++++ .../score_source_code_linker/needlinks.py | 114 +++++++++-- src/helper_lib/additional_functions.py | 25 +++ 9 files changed, 555 insertions(+), 117 deletions(-) create mode 100644 src/extensions/score_source_code_linker/metadata.py create mode 100644 src/extensions/score_source_code_linker/module_source_links.py diff --git a/docs.bzl b/docs.bzl index 8f728f123..4f1ec3131 100644 --- a/docs.bzl +++ b/docs.bzl @@ -69,7 +69,7 @@ def _rewrite_needs_json_to_sourcelinks(labels): out.append(s) return out -def _merge_sourcelinks(name, sourcelinks): +def _merge_sourcelinks(name, sourcelinks, known_good = None): """Merge multiple sourcelinks JSON files into a single file. Args: @@ -77,15 +77,22 @@ def _merge_sourcelinks(name, sourcelinks): sourcelinks: List of sourcelinks JSON file targets """ + extra_srcs = [] + known_good_arg = "" + if known_good != None: + extra_srcs = [known_good] + known_good_arg = "--known_good $(location %s)" % known_good + native.genrule( name = name, - srcs = sourcelinks, + srcs = sourcelinks + extra_srcs, outs = [name + ".json"], cmd = """ $(location @score_docs_as_code//scripts_bazel:merge_sourcelinks) \ --output $@ \ + {known_good_arg} \ $(SRCS) - """, + """.format(known_good_arg = known_good_arg), tools = ["@score_docs_as_code//scripts_bazel:merge_sourcelinks"], ) @@ -120,7 +127,7 @@ def _missing_requirements(deps): fail(msg) fail("This case should be unreachable?!") -def docs(source_dir = "docs", data = [], deps = [], scan_code = []): +def docs(source_dir = "docs", data = [], deps = [], scan_code = [], known_good = None): """Creates all targets related to documentation. By using this function, you'll get any and all updates for documentation targets in one place. @@ -175,7 +182,7 @@ def docs(source_dir = "docs", data = [], deps = [], scan_code = []): data_with_docs_sources = _rewrite_needs_json_to_docs_sources(data) additional_combo_sourcelinks = _rewrite_needs_json_to_sourcelinks(data) - _merge_sourcelinks(name = "merged_sourcelinks", sourcelinks = [":sourcelinks_json"] + additional_combo_sourcelinks) + _merge_sourcelinks(name = "merged_sourcelinks", sourcelinks = [":sourcelinks_json"] + additional_combo_sourcelinks, known_good = known_good) py_binary( name = "docs", diff --git a/scripts_bazel/generate_sourcelinks_cli.py b/scripts_bazel/generate_sourcelinks_cli.py index 4291b97c5..27a0e4508 100644 --- a/scripts_bazel/generate_sourcelinks_cli.py +++ b/scripts_bazel/generate_sourcelinks_cli.py @@ -26,24 +26,51 @@ _extract_references_from_file, # pyright: ignore[reportPrivateUsage] TODO: move it out of the extension and into this script ) from src.extensions.score_source_code_linker.needlinks import ( - store_source_code_links_json, + store_source_code_links_with_metadata_json, ) +from src.extensions.score_source_code_linker.metadata import MetaData logging.basicConfig(level=logging.INFO, format="%(message)s") logger = logging.getLogger(__name__) +def parse_module_name_from_path(path: Path) -> str: + """ + Parse out the Module-Name from the filename gotten + /home/user/.cache/bazel/aksj37981712/external/score_docs_as_code+/src/tests/testfile.py + => score_docs_as_code + """ + + # COMBO BUILD + # If external is in the filepath that gets parsed => + # file is in an external module => combo build + # e.g. .../external/score_docs_as_code+/src/helper_lib/__init__.py + + # PATH if we are in local repository + # PosixPath('src/helper_lib/test_helper_lib.py') + # Path if we are in combo build and externally + # PosixPath('external/score_docs_as_code+/src/helper_lib/test_helper_lib.py' + print("======== THIs IS PATH we PARSIGN FOr MODULE NAME") + print(path) + if str(path).startswith("external/"): + module_raw = str(path).removeprefix("external/") + filepath_split = str(module_raw).split("/", maxsplit=1) + module_name = str(filepath_split[0].removesuffix("+")) + return module_name + return "local_module" + + def main(): parser = argparse.ArgumentParser( description="Generate source code links JSON from source files" ) - parser.add_argument( + _ = parser.add_argument( "--output", required=True, type=Path, help="Output JSON file path", ) - parser.add_argument( + _ = parser.add_argument( "files", nargs="*", type=Path, @@ -53,15 +80,29 @@ def main(): args = parser.parse_args() all_need_references = [] + metadata: MetaData = { + "module_name": "", + "hash": "", + "url": "", + } + metadata_set = False for file_path in args.files: + if "known_good.json" not in str(file_path) and not metadata_set: + metadata["module_name"] = parse_module_name_from_path(file_path) + print("================") + print(metadata) + print("===============") + print("METADATA SET") + metadata_set = True abs_file_path = file_path.resolve() assert abs_file_path.exists(), abs_file_path references = _extract_references_from_file( abs_file_path.parent, Path(abs_file_path.name) ) all_need_references.extend(references) - - store_source_code_links_json(args.output, all_need_references) + store_source_code_links_with_metadata_json( + file=args.output, metadata=metadata, needlist=all_need_references + ) logger.info( f"Found {len(all_need_references)} need references in {len(args.files)} files" ) diff --git a/scripts_bazel/merge_sourcelinks.py b/scripts_bazel/merge_sourcelinks.py index f194e19ca..034d45db5 100644 --- a/scripts_bazel/merge_sourcelinks.py +++ b/scripts_bazel/merge_sourcelinks.py @@ -20,22 +20,71 @@ import logging import sys from pathlib import Path +from typing import Any + +# from src.extensions.score_source_code_linker.need_source_links import ( +# store_source_code_links_combined_json, +# ) logging.basicConfig(level=logging.INFO, format="%(message)s") logger = logging.getLogger(__name__) +# [ +# PosixPath('bazel-out/k8-fastbuild/bin/sourcelinks_json.json'), +# PosixPath('bazel-out/k8-fastbuild/bin/external/score_persistency+/sourcelinks_json.json'), +# PosixPath('bazel-out/k8-fastbuild/bin/external/score_orchestrator+/sourcelinks_json.json'), +# PosixPath('bazel-out/k8-fastbuild/bin/external/score_kyron+/sourcelinks_json.json'), +# PosixPath('bazel-out/k8-fastbuild/bin/external/score_baselibs+/sourcelinks_json.json'), +# PosixPath('bazel-out/k8-fastbuild/bin/external/score_baselibs_rust+/sourcelinks_json.json'), +# PosixPath('bazel-out/k8-fastbuild/bin/external/score_logging+/sourcelinks_json.json'), +# PosixPath('bazel-out/k8-fastbuild/bin/external/score_platform+/sourcelinks_json.json'), +# PosixPath('bazel-out/k8-fastbuild/bin/external/score_process+/sourcelinks_json.json'), +# PosixPath('bazel-out/k8-fastbuild/bin/external/score_docs_as_code+/sourcelinks_json.json') +# ] + + +""" +if bazel-out/k8-fastbuild/bin/external/ in file_path => module is external +otherwise it's local +if local => module_name & hash == empty +if external => parse thing for module_name => look up known_good json for hash & url +""" + + +def parse_info_from_known_good( + known_good_json: Path, module_name: str +) -> tuple[str, str]: + print("===THIS IS MODULE NAME WE LOOK FOR===========") + print(module_name) + with open(known_good_json, "r") as f: + kg_json = json.load(f) + for category in kg_json["modules"].values(): + print("===THIS IS CATEGORY=========") + print(category) + if module_name in category: + print("===THIS IS MODULE NAME INSIDe CATEGORY===========") + print(module_name) + m = category[module_name] + return (m["hash"], m["repo"].removesuffix(".git")) + raise KeyError(f"Module {module_name!r} not found in known_good_json.") + def main(): parser = argparse.ArgumentParser( description="Merge multiple sourcelinks JSON files into one" ) - parser.add_argument( + _ = parser.add_argument( "--output", required=True, type=Path, help="Output merged JSON file path", ) - parser.add_argument( + _ = parser.add_argument( + "--known_good", + default=None, + help="Optional path to a 'known good' JSON file (provided by Bazel).", + ) + _ = parser.add_argument( "files", nargs="*", type=Path, @@ -43,13 +92,26 @@ def main(): ) args = parser.parse_args() + all_files = [x for x in args.files if "known_good.json" not in str(x)] merged = [] - for json_file in args.files: + for json_file in all_files: with open(json_file) as f: data = json.load(f) + metadata = data[0] + if metadata["module_name"] and metadata["module_name"] != "local_module": + hash, repo = parse_info_from_known_good( + known_good_json=args.known_good, module_name=metadata["module_name"] + ) + metadata["hash"] = hash + metadata["url"] = repo + # In the case that 'metadata[module_name]' is empty + # hash & url are already existing and empty inside of 'metadata' + # Therefore all 3 keys will be written to needlinks in each branch + for d in data[1:]: + d.update(metadata) assert isinstance(data, list), repr(data) - merged.extend(data) + merged.extend(data[1:]) with open(args.output, "w") as f: json.dump(merged, f, indent=2, ensure_ascii=False) diff --git a/src/extensions/score_source_code_linker/__init__.py b/src/extensions/score_source_code_linker/__init__.py index cf9843dc5..7e0ff0252 100644 --- a/src/extensions/score_source_code_linker/__init__.py +++ b/src/extensions/score_source_code_linker/__init__.py @@ -21,7 +21,6 @@ # This whole directory implements the above mentioned tool requirements import os -from collections import defaultdict from copy import deepcopy from pathlib import Path from typing import cast @@ -40,10 +39,19 @@ SourceCodeLinks, load_source_code_links_combined_json, store_source_code_links_combined_json, + group_by_need, ) + +from src.extensions.score_source_code_linker.module_source_links import ( + group_needs_by_module, + store_module_source_links_json, + load_module_source_links_json, +) + from src.extensions.score_source_code_linker.needlinks import ( NeedLink, load_source_code_links_json, + load_source_code_links_with_metadata_json, ) from src.extensions.score_source_code_linker.testlink import ( DataForTestLink, @@ -71,53 +79,6 @@ # ╰──────────────────────────────────────╯ -def group_by_need( - source_code_links: list[NeedLink], - test_case_links: list[DataForTestLink] | None = None, -) -> list[SourceCodeLinks]: - """ - Groups the given need links and test case links by their need ID. - Returns a nested dictionary structure with 'CodeLink' and 'TestLink' categories. - Example output: - - - { - "need": "", - "links": { - "CodeLinks": [NeedLink, NeedLink, ...], - "TestLinks": [testlink, testlink, ...] - } - } - """ - # TODO: I wonder if there is a more efficent way to do this - grouped_by_need: dict[str, NeedSourceLinks] = defaultdict( - lambda: NeedSourceLinks(TestLinks=[], CodeLinks=[]) - ) - - # Group source code links - for needlink in source_code_links: - grouped_by_need[needlink.need].CodeLinks.append(needlink) - - # Group test case links - if test_case_links is not None: - for testlink in test_case_links: - grouped_by_need[testlink.need].TestLinks.append(testlink) - - # Build final list of SourceCodeLinks - result: list[SourceCodeLinks] = [ - SourceCodeLinks( - need=need, - links=NeedSourceLinks( - CodeLinks=need_links.CodeLinks, - TestLinks=need_links.TestLinks, - ), - ) - for need, need_links in grouped_by_need.items() - ] - - return result - - def get_cache_filename(build_dir: Path, filename: str) -> Path: """ Returns the path to the cache file for the source code linker. @@ -142,14 +103,19 @@ def build_and_save_combined_file(outdir: Path): else: source_code_links_json = Path(source_code_links_json) - source_code_links = load_source_code_links_json(source_code_links_json) + # This isn't pretty will think of a better solution later, for now this should work + try: + source_code_links = load_source_code_links_json(source_code_links_json) + except AssertionError: + source_code_links = load_source_code_links_with_metadata_json( + source_code_links_json + ) test_code_links = load_test_xml_parsed_json( get_cache_filename(outdir, "score_xml_parser_cache.json") ) - + scl_list = group_by_need(source_code_links, test_code_links) store_source_code_links_combined_json( - outdir / "score_scl_grouped_cache.json", - group_by_need(source_code_links, test_code_links), + outdir / "score_scl_grouped_cache.json", scl_list ) @@ -254,10 +220,10 @@ def setup_test_code_linker(app: Sphinx, env: BuildEnvironment): def register_combined_linker(app: Sphinx): - # Registering the combined linker to Sphinx + # Registering the final combine linker to Sphinx # priority is set to make sure it is called in the right order. - # Needs to be called after xml parsing & codelink - app.connect("env-updated", setup_combined_linker, priority=507) + # Needs to be called after xml parsing & codelink & combined_linker + app.connect("env-updated", setup_combined_linker, priority=510) def setup_combined_linker(app: Sphinx, _: BuildEnvironment): @@ -272,6 +238,37 @@ def setup_combined_linker(app: Sphinx, _: BuildEnvironment): build_and_save_combined_file(app.outdir) +def register_module_linker(app: Sphinx): + # Registering the combined linker to Sphinx + # priority is set to make sure it is called in the right order. + # Needs to be called after xml parsing & codelink + app.connect("env-updated", setup_module_linker, priority=520) + + +def build_and_save_module_scl_file(outdir: Path): + scl_links = load_source_code_links_combined_json( + get_cache_filename(outdir, "score_scl_grouped_cache.json") + ) + mcl_links = group_needs_by_module(scl_links) + store_module_source_links_json( + outdir / "score_module_grouped_scl_cache.json", mcl_links + ) + + +def setup_module_linker(app: Sphinx, _: BuildEnvironment): + grouped_cache = get_cache_filename( + app.outdir, "score_module_grouped_scl_cache.json" + ) + gruped_cache_exists = grouped_cache.exists() + # TODO this cache should be done via Bazel + if not gruped_cache_exists or not app.config.skip_rescanning_via_source_code_linker: + LOGGER.debug( + "Did not find combined json 'score_module_grouped_scl_cache.json' " + "in _build. Generating new one" + ) + build_and_save_module_scl_file(app.outdir) + + def setup_once(app: Sphinx): # might be the only way to solve this? if "skip_rescanning_via_source_code_linker" in app.config: @@ -295,9 +292,10 @@ def setup_once(app: Sphinx): setup_source_code_linker(app, ws_root) register_test_code_linker(app) register_combined_linker(app) + register_module_linker(app) - # Priorty=510 to ensure it's called after the test code linker & combined connection - app.connect("env-updated", inject_links_into_needs, priority=510) + # Priorty=515 to ensure it's called after the test code linker & combined connection + app.connect("env-updated", inject_links_into_needs, priority=525) def setup(app: Sphinx) -> dict[str, str | bool]: @@ -352,42 +350,46 @@ def inject_links_into_needs(app: Sphinx, env: BuildEnvironment) -> None: f"?? Need {id} already has testlink: {need.get('testlink')}" ) - source_code_links_by_need = load_source_code_links_combined_json( - get_cache_filename(app.outdir, "score_scl_grouped_cache.json") + scl_by_module = load_module_source_links_json( + get_cache_filename(app.outdir, "score_module_grouped_scl_cache.json") ) + # source_code_links_by_need = load_source_code_links_combined_json( + # get_cache_filename(app.outdir, "score_scl_grouped_cache.json") + # ) + for module_grouped_needs in scl_by_module: + for source_code_links in module_grouped_needs.needs: + need = find_need(needs_copy, source_code_links.need) + if need is None: + # TODO: print github annotations as in https://github.com/eclipse-score/bazel_registry/blob/7423b9996a45dd0a9ec868e06a970330ee71cf4f/tools/verify_semver_compatibility_level.py#L126-L129 + for n in source_code_links.links.CodeLinks: + LOGGER.warning( + f"{n.file}:{n.line}: Could not find {source_code_links.need} " + "in documentation [CODE LINK]", + type="score_source_code_linker", + ) + for n in source_code_links.links.TestLinks: + LOGGER.warning( + f"{n.file}:{n.line}: Could not find {source_code_links.need} " + "in documentation [TEST LINK]", + type="score_source_code_linker", + ) + continue + + need_as_dict = cast(dict[str, object], need) + metadata = module_grouped_needs.module_name + need_as_dict["source_code_link"] = ", ".join( + f"{get_github_link(metadata, n)}<>{n.file}:{n.line}" + for n in source_code_links.links.CodeLinks + ) + need_as_dict["testlink"] = ", ".join( + f"{get_github_link(metadata, n)}<>{n.name}" + for n in source_code_links.links.TestLinks + ) - for source_code_links in source_code_links_by_need: - need = find_need(needs_copy, source_code_links.need) - if need is None: - # TODO: print github annotations as in https://github.com/eclipse-score/bazel_registry/blob/7423b9996a45dd0a9ec868e06a970330ee71cf4f/tools/verify_semver_compatibility_level.py#L126-L129 - for n in source_code_links.links.CodeLinks: - LOGGER.warning( - f"{n.file}:{n.line}: Could not find {source_code_links.need} " - "in documentation [CODE LINK]", - type="score_source_code_linker", - ) - for n in source_code_links.links.TestLinks: - LOGGER.warning( - f"{n.file}:{n.line}: Could not find {source_code_links.need} " - "in documentation [TEST LINK]", - type="score_source_code_linker", - ) - continue - - need_as_dict = cast(dict[str, object], need) - - need_as_dict["source_code_link"] = ", ".join( - f"{get_github_link(n)}<>{n.file}:{n.line}" - for n in source_code_links.links.CodeLinks - ) - need_as_dict["testlink"] = ", ".join( - f"{get_github_link(n)}<>{n.name}" for n in source_code_links.links.TestLinks - ) - - # NOTE: Removing & adding the need is important to make sure - # the needs gets 're-evaluated'. - Needs_Data.remove_need(need["id"]) - Needs_Data.add_need(need) + # NOTE: Removing & adding the need is important to make sure + # the needs gets 're-evaluated'. + Needs_Data.remove_need(need["id"]) + Needs_Data.add_need(need) # ╭──────────────────────────────────────╮ diff --git a/src/extensions/score_source_code_linker/metadata.py b/src/extensions/score_source_code_linker/metadata.py new file mode 100644 index 000000000..03acc537a --- /dev/null +++ b/src/extensions/score_source_code_linker/metadata.py @@ -0,0 +1,23 @@ +# ******************************************************************************* +# Copyright (c) 2026 Contributors to the Eclipse Foundation +# +# See the NOTICE file(s) distributed with this work for additional +# information regarding copyright ownership. +# +# This program and the accompanying materials are made available under the +# terms of the Apache License Version 2.0 which is available at +# https://www.apache.org/licenses/LICENSE-2.0 +# +# SPDX-License-Identifier: Apache-2.0 +# ******************************************************************************* + +from typing import TypedDict, TypeGuard + +class MetaData(TypedDict): + module_name: str + hash: str + url: str + +def is_metadata(x: object) -> TypeGuard[MetaData]: + # Make this as strict/loose as you want; at minimum, it must be a dict. + return isinstance(x, dict) and {"module_name", "hash", "url"} <= x.keys() diff --git a/src/extensions/score_source_code_linker/module_source_links.py b/src/extensions/score_source_code_linker/module_source_links.py new file mode 100644 index 000000000..3957733f0 --- /dev/null +++ b/src/extensions/score_source_code_linker/module_source_links.py @@ -0,0 +1,137 @@ +# ******************************************************************************* +# Copyright (c) 2025 Contributors to the Eclipse Foundation +# +# See the NOTICE file(s) distributed with this work for additional +# information regarding copyright ownership. +# +# This program and the accompanying materials are made available under the +# terms of the Apache License Version 2.0 which is available at +# https://www.apache.org/licenses/LICENSE-2.0 +# +# SPDX-License-Identifier: Apache-2.0 +# ******************************************************************************* + + +from dataclasses import dataclass, asdict, field +import json +from typing import Any +from pathlib import Path + + +from src.extensions.score_source_code_linker.need_source_links import ( + NeedSourceLinks, + SourceCodeLinks, + SourceCodeLinks_JSON_Decoder, +) +from src.extensions.score_source_code_linker.needlinks import NeedLink +from src.extensions.score_source_code_linker.testlink import DataForTestLink + + +@dataclass +class moduleInfo: + name: str + hash: str + url: str + + +@dataclass +class ModuleSourceLinks: + module_name: moduleInfo + needs: list[SourceCodeLinks] = field(default_factory=list) + + +class ModuleSourceLinks_JSON_Encoder(json.JSONEncoder): + def default(self, o: object): + if isinstance(o, Path): + return str(o) + # We do not want to save the metadata inside the codelink + # (hash, module_name, url) + if isinstance(o, NeedLink): + return o.to_dict_without_metadata() + if isinstance( + o, ModuleSourceLinks | SourceCodeLinks | DataForTestLink | NeedSourceLinks + ): + return asdict(o) + return super().default(o) + + +def ModuleSourceLinks_JSON_Decoder( + d: dict[str, Any], +) -> ModuleSourceLinks | dict[str, Any]: + if "module_name" in d and "needs" in d: + module_name = d["module_name"] + needs = d["needs"] + return ModuleSourceLinks( + module_name=moduleInfo( + name=module_name.get("module_name"), + hash=module_name.get("hash"), + url=module_name.get("url"), + ), + # We know this can only be list[SourceCodeLinks] and nothing else + # Therefore => we ignore the type error here + needs=[SourceCodeLinks_JSON_Decoder(need) for need in needs] # type: ignore + ) + return d + + +def store_module_source_links_json( + file: Path, source_code_links: list[ModuleSourceLinks] +): + # After `rm -rf _build` or on clean builds the directory does not exist, so we need + # to create it + file.parent.mkdir(exist_ok=True) + with open(file, "w") as f: + json.dump( + source_code_links, + f, + cls=ModuleSourceLinks_JSON_Encoder, + indent=2, + ensure_ascii=False, + ) + + +def load_module_source_links_json(file: Path) -> list[ModuleSourceLinks]: + links: list[ModuleSourceLinks] = json.loads( + file.read_text(encoding="utf-8"), + object_hook=ModuleSourceLinks_JSON_Decoder, + ) + assert isinstance(links, list), ( + "The combined source code linker links should be " + "a list of SourceCodeLinks objects." + ) + assert all(isinstance(link, ModuleSourceLinks) for link in links), ( + "All items in combined_source_code_linker_cache should be " + "SourceCodeLinks objects." + ) + return links + + +def group_needs_by_module(links: list[SourceCodeLinks]) -> list[ModuleSourceLinks]: + module_groups: dict[str, ModuleSourceLinks] = {} + + for source_link in links: + if not source_link.links.CodeLinks: + continue + + first_link = source_link.links.CodeLinks[0] + module_key = first_link.module_name + + if module_key not in module_groups: + module_groups[module_key] = ModuleSourceLinks( + module_name=moduleInfo(name=module_key, hash=first_link.hash, url=first_link.url) + ) + + module_groups[module_key].needs.append(source_link) # Much clearer! + + return [ + ModuleSourceLinks(module_name=group.module_name, needs=group.needs) + for group in module_groups.values() + ] + + +# # Pouplate Metadata +# # Since all metadata inside the Codelinks is the same +# # we can just arbitrarily grab the first one +# module_name=need_links.CodeLinks[0].module_name, +# hash=need_links.CodeLinks[0].hash, +# url=need_links.CodeLinks[0].url, diff --git a/src/extensions/score_source_code_linker/need_source_links.py b/src/extensions/score_source_code_linker/need_source_links.py index 6c738da8e..2deb4c5e8 100644 --- a/src/extensions/score_source_code_linker/need_source_links.py +++ b/src/extensions/score_source_code_linker/need_source_links.py @@ -21,6 +21,7 @@ import json from dataclasses import asdict, dataclass, field +from collections import defaultdict from pathlib import Path from typing import Any @@ -108,3 +109,53 @@ def load_source_code_links_combined_json(file: Path) -> list[SourceCodeLinks]: "SourceCodeLinks objects." ) return links + + +def group_by_need( + source_code_links: list[NeedLink], + test_case_links: list[DataForTestLink] | None = None, +) -> list[SourceCodeLinks]: + """ + Groups the given need links and test case links by their need ID. + Returns a nested dictionary structure with 'CodeLink' and 'TestLink' categories. + Example output: + + + { + "need": "", + "module_name": , + "hash": , + "url": , + "links": { + "CodeLinks": [NeedLink, NeedLink, ...], + "TestLinks": [testlink, testlink, ...] + } + } + """ + # TODO: I wonder if there is a more efficent way to do this + grouped_by_need: dict[str, NeedSourceLinks] = defaultdict( + lambda: NeedSourceLinks(TestLinks=[], CodeLinks=[]) + ) + + # Group source code links + for needlink in source_code_links: + grouped_by_need[needlink.need].CodeLinks.append(needlink) + + # Group test case links + if test_case_links is not None: + for testlink in test_case_links: + grouped_by_need[testlink.need].TestLinks.append(testlink) + + # Build final list of SourceCodeLinks + result: list[SourceCodeLinks] = [ + SourceCodeLinks( + need=need, + links=NeedSourceLinks( + CodeLinks=need_links.CodeLinks, + TestLinks=need_links.TestLinks, + ), + ) + for need, need_links in grouped_by_need.items() + ] + + return result diff --git a/src/extensions/score_source_code_linker/needlinks.py b/src/extensions/score_source_code_linker/needlinks.py index 348147292..a727025bd 100644 --- a/src/extensions/score_source_code_linker/needlinks.py +++ b/src/extensions/score_source_code_linker/needlinks.py @@ -16,10 +16,11 @@ import os from dataclasses import asdict, dataclass from pathlib import Path -from typing import Any +from typing import Any, TypeGuard +from src.extensions.score_source_code_linker.metadata import MetaData, is_metadata -@dataclass(frozen=True, order=True) +@dataclass(order=True) class NeedLink: """Represents a single template string finding in a file.""" @@ -28,6 +29,19 @@ class NeedLink: tag: str need: str full_line: str + module_name: str = "" + hash: str = "" + url: str = "" + + def to_dict_full(self) -> dict[str, str | Path]: + return asdict(self) + + def to_dict_without_metadata(self) -> dict[str, str | Path]: + d = asdict(self) + d.pop("module_name", None) + d.pop("hash", None) + d.pop("url", None) + return d def DefaultNeedLink() -> NeedLink: @@ -41,6 +55,8 @@ def DefaultNeedLink() -> NeedLink: tag="", need="", full_line="", + # Module_name, hash, url are defaulted to "" + # therefore not needed to be listed ) @@ -61,26 +77,99 @@ def needlink_decoder(d: dict[str, Any]) -> NeedLink | dict[str, Any]: tag=d["tag"], need=d["need"], full_line=d["full_line"], + module_name=d.get("module_name", ""), + hash=d.get("hash", ""), + url=d.get("url", ""), ) # It's something else, pass it on to other decoders return d -def store_source_code_links_json(file: Path, needlist: list[NeedLink]): - # After `rm -rf _build` or on clean builds the directory does not exist, - # so we need to create it +def store_source_code_links_with_metadata_json( + file: Path, metadata: MetaData, needlist: list[NeedLink] +) -> None: + """ + Writes a JSON array: + [ meta_dict, needlink1, needlink2, ... ] + + meta_dict must include: + module_name, hash, url + """ + payload: list[object] = [metadata, *needlist] + file.parent.mkdir(exist_ok=True) - with open(file, "w") as f: - json.dump( - needlist, - f, - cls=NeedLinkEncoder, # use your custom encoder - indent=2, - ensure_ascii=False, + with open(file, "w", encoding="utf-8") as f: + json.dump(payload, f, cls=NeedLinkEncoder, indent=2, ensure_ascii=False) + + +def store_source_code_links_json(file: Path, needlist: list[NeedLink]) -> None: + """ + Writes a JSON array: + [ meta_dict, needlink1, needlink2, ... ] + + meta_dict must include: + module_name, hash, url + """ + + file.parent.mkdir(exist_ok=True) + with open(file, "w", encoding="utf-8") as f: + json.dump(needlist, f, cls=NeedLinkEncoder, indent=2, ensure_ascii=False) + + +def _is_needlink_list(xs: list[object]) -> TypeGuard[list[NeedLink]]: + return all(isinstance(link, NeedLink) for link in xs) + + +def load_source_code_links_with_metadata_json(file: Path) -> list[NeedLink]: + """ + Expects the JSON array where first is a meta_dict: + [ meta_dict, needlink1, needlink2, ... ] + Returns: + [NeedLink, NeedLink, ...] + + This normally should be the one called 'locally' => :docs target + """ + if not file.is_absolute(): + ws_root = os.environ.get("BUILD_WORKSPACE_DIRECTORY") + if ws_root: + file = Path(ws_root) / file + + data: list[object] = json.loads( + file.read_text(encoding="utf-8"), + object_hook=needlink_decoder, + ) + links: list[object] = [] + if not is_metadata(data[0]): + raise TypeError( + "If you do not have a 'metadata' dict as the first one in the json " + "you might wanted to call the load without metadata named: " + "'load_source_code_links_json'" ) + metadata: MetaData = data[0] + links = data[1:] + if not _is_needlink_list(links): + raise TypeError( + "In local build context all items after" + f"metadata must decode to NeedLink objects. File: {file}" + ) + for d in links: + d.module_name = metadata["module_name"] + d.hash = metadata["hash"] + d.url = metadata["url"] + return links def load_source_code_links_json(file: Path) -> list[NeedLink]: + """ + Expects the JSON array with needlinks + *that already have extra info in them* (module_name, hash, url): + [ needlink1, needlink2, ... ] + Returns: + [NeedLink, NeedLink, ...] + + This normally should be the one called in combo builds + => :docs_combo_experimental target + """ if not file.is_absolute(): # use env variable set by Bazel ws_root = os.environ.get("BUILD_WORKSPACE_DIRECTORY") @@ -98,3 +187,4 @@ def load_source_code_links_json(file: Path) -> list[NeedLink]: "All items in source_code_links should be NeedLink objects." ) return links + diff --git a/src/helper_lib/additional_functions.py b/src/helper_lib/additional_functions.py index 5b1ce6d98..872d66af2 100644 --- a/src/helper_lib/additional_functions.py +++ b/src/helper_lib/additional_functions.py @@ -14,6 +14,7 @@ # Import types that depend on score_source_code_linker from src.extensions.score_source_code_linker.needlinks import DefaultNeedLink, NeedLink +from src.extensions.score_source_code_linker.module_source_links import moduleInfo from src.extensions.score_source_code_linker.testlink import ( DataForTestLink, DataOfTestCase, @@ -26,6 +27,19 @@ def get_github_link( + metadata: moduleInfo, + link: NeedLink | DataForTestLink | DataOfTestCase | None = None, +) -> str: + if link is None: + link = DefaultNeedLink() + if not metadata.hash: + # Local path (//:docs) + return get_github_link_from_git(link) + # Ref-Integration path (//:docs_combo..) + return get_github_link_from_json(metadata, link) + + +def get_github_link_from_git( link: NeedLink | DataForTestLink | DataOfTestCase | None = None, ) -> str: if link is None: @@ -36,3 +50,14 @@ def get_github_link( base_url = get_github_base_url() current_hash = get_current_git_hash(passed_git_root) return f"{base_url}/blob/{current_hash}/{link.file}#L{link.line}" + + +def get_github_link_from_json( + metadata: moduleInfo, + link: NeedLink | DataForTestLink | DataOfTestCase | None = None, +): + if link is None: + link = DefaultNeedLink() + base_url = metadata.url + current_hash = metadata.hash + return f"{base_url}/blob/{current_hash}/{link.file}#L{link.line}" From ebdb40faa3b0a400e46bb76952fd3ec766c0bad8 Mon Sep 17 00:00:00 2001 From: MaximilianSoerenPollak Date: Wed, 11 Mar 2026 21:55:42 +0100 Subject: [PATCH 02/11] WIP: seems to work in ref & local --- scripts_bazel/generate_sourcelinks_cli.py | 13 ++++++++----- .../generate_source_code_links_json.py | 19 +++++++++++-------- 2 files changed, 19 insertions(+), 13 deletions(-) diff --git a/scripts_bazel/generate_sourcelinks_cli.py b/scripts_bazel/generate_sourcelinks_cli.py index 27a0e4508..2f26784ff 100644 --- a/scripts_bazel/generate_sourcelinks_cli.py +++ b/scripts_bazel/generate_sourcelinks_cli.py @@ -89,15 +89,18 @@ def main(): for file_path in args.files: if "known_good.json" not in str(file_path) and not metadata_set: metadata["module_name"] = parse_module_name_from_path(file_path) - print("================") - print(metadata) - print("===============") - print("METADATA SET") + # print("================") + # print(metadata) + # print("===============") + # print("METADATA SET") metadata_set = True abs_file_path = file_path.resolve() assert abs_file_path.exists(), abs_file_path + # print("THIS Is ABS FILEPATH: ", file_path) + # print("THIS IS ABS FILEPATH NAME: ", abs_file_path.name) + # print("THIS Is ABS FILEPATH PARENT: ", abs_file_path.parent) references = _extract_references_from_file( - abs_file_path.parent, Path(abs_file_path.name) + abs_file_path.parent, Path(abs_file_path.name), file_path ) all_need_references.extend(references) store_source_code_links_with_metadata_json( diff --git a/src/extensions/score_source_code_linker/generate_source_code_links_json.py b/src/extensions/score_source_code_linker/generate_source_code_links_json.py index abedc2db4..1c240e0ef 100644 --- a/src/extensions/score_source_code_linker/generate_source_code_links_json.py +++ b/src/extensions/score_source_code_linker/generate_source_code_links_json.py @@ -19,6 +19,8 @@ import os from pathlib import Path +from sphinx_needs.logging import get_logger +LOGGER = get_logger(__name__) from src.extensions.score_source_code_linker.needlinks import ( NeedLink, @@ -43,21 +45,21 @@ def _extract_references_from_line(line: str): yield tag, req.strip() -def _extract_references_from_file(root: Path, file_path: Path) -> list[NeedLink]: +def _extract_references_from_file(root: Path, file_path_name: Path, file_path: Path) -> list[NeedLink]: """Scan a single file for template strings and return findings.""" assert root.is_absolute(), "Root path must be absolute" - assert not file_path.is_absolute(), "File path must be relative to the root" + assert not file_path_name.is_absolute(), "File path must be relative to the root" # assert file_path.is_relative_to(root), ( # f"File path ({file_path}) must be relative to the root ({root})" # ) - assert (root / file_path).exists(), ( - f"File {file_path} does not exist in root {root}." + assert (root / file_path_name).exists(), ( + f"File {file_path_name} does not exist in root {root}." ) findings: list[NeedLink] = [] try: - with open(root / file_path, encoding="utf-8", errors="ignore") as f: + with open(root / file_path_name, encoding="utf-8", errors="ignore") as f: for line_num, line in enumerate(f, 1): for tag, req in _extract_references_from_line(line): findings.append( @@ -69,8 +71,9 @@ def _extract_references_from_file(root: Path, file_path: Path) -> list[NeedLink] full_line=line.strip(), ) ) - except (UnicodeDecodeError, PermissionError, OSError): + except (UnicodeDecodeError, PermissionError, OSError) as e: # Skip files that can't be read as text + LOGGER.debug(f"Error reading file to parse for linked needs: \n{e}") pass return findings @@ -121,8 +124,8 @@ def find_all_need_references(search_path: Path) -> list[NeedLink]: all_need_references.extend(references) elapsed_time = os.times().elapsed - start_time - print( - f"DEBUG: Found {len(all_need_references)} need references " + LOGGER.debug( + f"Found {len(all_need_references)} need references " f"in {elapsed_time:.2f} seconds" ) From 7f9f8bb12fdc8280b4b87da4bdba73b23b2f67f2 Mon Sep 17 00:00:00 2001 From: MaximilianSoerenPollak Date: Thu, 12 Mar 2026 13:23:22 +0100 Subject: [PATCH 03/11] Remove metadata file --- scripts_bazel/generate_sourcelinks_cli.py | 2 +- .../score_source_code_linker/metadata.py | 23 ------------------- .../score_source_code_linker/needlinks.py | 14 ++++++++--- 3 files changed, 12 insertions(+), 27 deletions(-) delete mode 100644 src/extensions/score_source_code_linker/metadata.py diff --git a/scripts_bazel/generate_sourcelinks_cli.py b/scripts_bazel/generate_sourcelinks_cli.py index 2f26784ff..6d9336e23 100644 --- a/scripts_bazel/generate_sourcelinks_cli.py +++ b/scripts_bazel/generate_sourcelinks_cli.py @@ -27,8 +27,8 @@ ) from src.extensions.score_source_code_linker.needlinks import ( store_source_code_links_with_metadata_json, + MetaData, ) -from src.extensions.score_source_code_linker.metadata import MetaData logging.basicConfig(level=logging.INFO, format="%(message)s") logger = logging.getLogger(__name__) diff --git a/src/extensions/score_source_code_linker/metadata.py b/src/extensions/score_source_code_linker/metadata.py deleted file mode 100644 index 03acc537a..000000000 --- a/src/extensions/score_source_code_linker/metadata.py +++ /dev/null @@ -1,23 +0,0 @@ -# ******************************************************************************* -# Copyright (c) 2026 Contributors to the Eclipse Foundation -# -# See the NOTICE file(s) distributed with this work for additional -# information regarding copyright ownership. -# -# This program and the accompanying materials are made available under the -# terms of the Apache License Version 2.0 which is available at -# https://www.apache.org/licenses/LICENSE-2.0 -# -# SPDX-License-Identifier: Apache-2.0 -# ******************************************************************************* - -from typing import TypedDict, TypeGuard - -class MetaData(TypedDict): - module_name: str - hash: str - url: str - -def is_metadata(x: object) -> TypeGuard[MetaData]: - # Make this as strict/loose as you want; at minimum, it must be a dict. - return isinstance(x, dict) and {"module_name", "hash", "url"} <= x.keys() diff --git a/src/extensions/score_source_code_linker/needlinks.py b/src/extensions/score_source_code_linker/needlinks.py index a727025bd..abcdd0dbb 100644 --- a/src/extensions/score_source_code_linker/needlinks.py +++ b/src/extensions/score_source_code_linker/needlinks.py @@ -16,8 +16,17 @@ import os from dataclasses import asdict, dataclass from pathlib import Path -from typing import Any, TypeGuard -from src.extensions.score_source_code_linker.metadata import MetaData, is_metadata +from typing import Any, TypeGuard, TypedDict + + +class MetaData(TypedDict): + module_name: str + hash: str + url: str + +def is_metadata(x: object) -> TypeGuard[MetaData]: + # Make this as strict/loose as you want; at minimum, it must be a dict. + return isinstance(x, dict) and {"module_name", "hash", "url"} <= x.keys() @dataclass(order=True) @@ -187,4 +196,3 @@ def load_source_code_links_json(file: Path) -> list[NeedLink]: "All items in source_code_links should be NeedLink objects." ) return links - From 403b9e97f5fe8730655ace40bd97ca196304cd0f Mon Sep 17 00:00:00 2001 From: MaximilianSoerenPollak Date: Thu, 12 Mar 2026 13:24:21 +0100 Subject: [PATCH 04/11] Formatting & Linting --- scripts_bazel/merge_sourcelinks.py | 7 +++---- .../score_source_code_linker/__init__.py | 16 +++++++--------- .../generate_source_code_links_json.py | 2 ++ .../module_source_links.py | 13 ++++++------- .../need_source_links.py | 2 +- src/helper_lib/additional_functions.py | 3 ++- 6 files changed, 21 insertions(+), 22 deletions(-) diff --git a/scripts_bazel/merge_sourcelinks.py b/scripts_bazel/merge_sourcelinks.py index 034d45db5..aa5ece761 100644 --- a/scripts_bazel/merge_sourcelinks.py +++ b/scripts_bazel/merge_sourcelinks.py @@ -20,7 +20,6 @@ import logging import sys from pathlib import Path -from typing import Any # from src.extensions.score_source_code_linker.need_source_links import ( # store_source_code_links_combined_json, @@ -44,8 +43,8 @@ """ -if bazel-out/k8-fastbuild/bin/external/ in file_path => module is external -otherwise it's local +if bazel-out/k8-fastbuild/bin/external/ in file_path => module is external +otherwise it's local if local => module_name & hash == empty if external => parse thing for module_name => look up known_good json for hash & url """ @@ -56,7 +55,7 @@ def parse_info_from_known_good( ) -> tuple[str, str]: print("===THIS IS MODULE NAME WE LOOK FOR===========") print(module_name) - with open(known_good_json, "r") as f: + with open(known_good_json) as f: kg_json = json.load(f) for category in kg_json["modules"].values(): print("===THIS IS CATEGORY=========") diff --git a/src/extensions/score_source_code_linker/__init__.py b/src/extensions/score_source_code_linker/__init__.py index 7e0ff0252..d82994dd0 100644 --- a/src/extensions/score_source_code_linker/__init__.py +++ b/src/extensions/score_source_code_linker/__init__.py @@ -34,20 +34,18 @@ from src.extensions.score_source_code_linker.generate_source_code_links_json import ( generate_source_code_links_json, ) +from src.extensions.score_source_code_linker.module_source_links import ( + group_needs_by_module, + load_module_source_links_json, + store_module_source_links_json, +) from src.extensions.score_source_code_linker.need_source_links import ( NeedSourceLinks, SourceCodeLinks, + group_by_need, load_source_code_links_combined_json, store_source_code_links_combined_json, - group_by_need, ) - -from src.extensions.score_source_code_linker.module_source_links import ( - group_needs_by_module, - store_module_source_links_json, - load_module_source_links_json, -) - from src.extensions.score_source_code_linker.needlinks import ( NeedLink, load_source_code_links_json, @@ -376,7 +374,7 @@ def inject_links_into_needs(app: Sphinx, env: BuildEnvironment) -> None: continue need_as_dict = cast(dict[str, object], need) - metadata = module_grouped_needs.module_name + metadata = module_grouped_needs.module need_as_dict["source_code_link"] = ", ".join( f"{get_github_link(metadata, n)}<>{n.file}:{n.line}" for n in source_code_links.links.CodeLinks diff --git a/src/extensions/score_source_code_linker/generate_source_code_links_json.py b/src/extensions/score_source_code_linker/generate_source_code_links_json.py index 1c240e0ef..ce6839b80 100644 --- a/src/extensions/score_source_code_linker/generate_source_code_links_json.py +++ b/src/extensions/score_source_code_linker/generate_source_code_links_json.py @@ -19,7 +19,9 @@ import os from pathlib import Path + from sphinx_needs.logging import get_logger + LOGGER = get_logger(__name__) from src.extensions.score_source_code_linker.needlinks import ( diff --git a/src/extensions/score_source_code_linker/module_source_links.py b/src/extensions/score_source_code_linker/module_source_links.py index 3957733f0..fe757b947 100644 --- a/src/extensions/score_source_code_linker/module_source_links.py +++ b/src/extensions/score_source_code_linker/module_source_links.py @@ -12,11 +12,10 @@ # ******************************************************************************* -from dataclasses import dataclass, asdict, field import json -from typing import Any +from dataclasses import asdict, dataclass, field from pathlib import Path - +from typing import Any from src.extensions.score_source_code_linker.need_source_links import ( NeedSourceLinks, @@ -36,7 +35,7 @@ class moduleInfo: @dataclass class ModuleSourceLinks: - module_name: moduleInfo + module: moduleInfo needs: list[SourceCodeLinks] = field(default_factory=list) @@ -62,7 +61,7 @@ def ModuleSourceLinks_JSON_Decoder( module_name = d["module_name"] needs = d["needs"] return ModuleSourceLinks( - module_name=moduleInfo( + module=moduleInfo( name=module_name.get("module_name"), hash=module_name.get("hash"), url=module_name.get("url"), @@ -118,13 +117,13 @@ def group_needs_by_module(links: list[SourceCodeLinks]) -> list[ModuleSourceLink if module_key not in module_groups: module_groups[module_key] = ModuleSourceLinks( - module_name=moduleInfo(name=module_key, hash=first_link.hash, url=first_link.url) + module=moduleInfo(name=module_key, hash=first_link.hash, url=first_link.url) ) module_groups[module_key].needs.append(source_link) # Much clearer! return [ - ModuleSourceLinks(module_name=group.module_name, needs=group.needs) + ModuleSourceLinks(module=group.module, needs=group.needs) for group in module_groups.values() ] diff --git a/src/extensions/score_source_code_linker/need_source_links.py b/src/extensions/score_source_code_linker/need_source_links.py index 2deb4c5e8..1823ba4b4 100644 --- a/src/extensions/score_source_code_linker/need_source_links.py +++ b/src/extensions/score_source_code_linker/need_source_links.py @@ -20,8 +20,8 @@ # req-Id: tool_req__docs_dd_link_source_code_link import json -from dataclasses import asdict, dataclass, field from collections import defaultdict +from dataclasses import asdict, dataclass, field from pathlib import Path from typing import Any diff --git a/src/helper_lib/additional_functions.py b/src/helper_lib/additional_functions.py index 872d66af2..049a14548 100644 --- a/src/helper_lib/additional_functions.py +++ b/src/helper_lib/additional_functions.py @@ -12,9 +12,10 @@ # ******************************************************************************* from pathlib import Path +from src.extensions.score_source_code_linker.module_source_links import moduleInfo + # Import types that depend on score_source_code_linker from src.extensions.score_source_code_linker.needlinks import DefaultNeedLink, NeedLink -from src.extensions.score_source_code_linker.module_source_links import moduleInfo from src.extensions.score_source_code_linker.testlink import ( DataForTestLink, DataOfTestCase, From bb73a0f97ea399d96d0f2f010cf750185cc657d4 Mon Sep 17 00:00:00 2001 From: MaximilianSoerenPollak Date: Thu, 12 Mar 2026 13:26:11 +0100 Subject: [PATCH 05/11] Known_good required in merge script --- scripts_bazel/merge_sourcelinks.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts_bazel/merge_sourcelinks.py b/scripts_bazel/merge_sourcelinks.py index aa5ece761..43785ea91 100644 --- a/scripts_bazel/merge_sourcelinks.py +++ b/scripts_bazel/merge_sourcelinks.py @@ -80,7 +80,7 @@ def main(): ) _ = parser.add_argument( "--known_good", - default=None, + required=True, help="Optional path to a 'known good' JSON file (provided by Bazel).", ) _ = parser.add_argument( From 703cc71dbe3a11f8e687c16a3722e72836fb0fac Mon Sep 17 00:00:00 2001 From: MaximilianSoerenPollak Date: Thu, 12 Mar 2026 13:30:37 +0100 Subject: [PATCH 06/11] Remove debug print statements --- scripts_bazel/generate_sourcelinks_cli.py | 17 +++-------------- scripts_bazel/merge_sourcelinks.py | 8 +------- 2 files changed, 4 insertions(+), 21 deletions(-) diff --git a/scripts_bazel/generate_sourcelinks_cli.py b/scripts_bazel/generate_sourcelinks_cli.py index 6d9336e23..8b1c4bfd5 100644 --- a/scripts_bazel/generate_sourcelinks_cli.py +++ b/scripts_bazel/generate_sourcelinks_cli.py @@ -44,15 +44,11 @@ def parse_module_name_from_path(path: Path) -> str: # COMBO BUILD # If external is in the filepath that gets parsed => # file is in an external module => combo build - # e.g. .../external/score_docs_as_code+/src/helper_lib/__init__.py + # Example Path: + # PosixPath('external/score_docs_as_code+/src/helper_lib/test_helper_lib.py' - # PATH if we are in local repository - # PosixPath('src/helper_lib/test_helper_lib.py') - # Path if we are in combo build and externally - # PosixPath('external/score_docs_as_code+/src/helper_lib/test_helper_lib.py' - print("======== THIs IS PATH we PARSIGN FOr MODULE NAME") - print(path) if str(path).startswith("external/"): + # This allows for files / folders etc. to have `external` in their name too. module_raw = str(path).removeprefix("external/") filepath_split = str(module_raw).split("/", maxsplit=1) module_name = str(filepath_split[0].removesuffix("+")) @@ -89,16 +85,9 @@ def main(): for file_path in args.files: if "known_good.json" not in str(file_path) and not metadata_set: metadata["module_name"] = parse_module_name_from_path(file_path) - # print("================") - # print(metadata) - # print("===============") - # print("METADATA SET") metadata_set = True abs_file_path = file_path.resolve() assert abs_file_path.exists(), abs_file_path - # print("THIS Is ABS FILEPATH: ", file_path) - # print("THIS IS ABS FILEPATH NAME: ", abs_file_path.name) - # print("THIS Is ABS FILEPATH PARENT: ", abs_file_path.parent) references = _extract_references_from_file( abs_file_path.parent, Path(abs_file_path.name), file_path ) diff --git a/scripts_bazel/merge_sourcelinks.py b/scripts_bazel/merge_sourcelinks.py index 43785ea91..b956b0bda 100644 --- a/scripts_bazel/merge_sourcelinks.py +++ b/scripts_bazel/merge_sourcelinks.py @@ -53,16 +53,10 @@ def parse_info_from_known_good( known_good_json: Path, module_name: str ) -> tuple[str, str]: - print("===THIS IS MODULE NAME WE LOOK FOR===========") - print(module_name) with open(known_good_json) as f: kg_json = json.load(f) for category in kg_json["modules"].values(): - print("===THIS IS CATEGORY=========") - print(category) if module_name in category: - print("===THIS IS MODULE NAME INSIDe CATEGORY===========") - print(module_name) m = category[module_name] return (m["hash"], m["repo"].removesuffix(".git")) raise KeyError(f"Module {module_name!r} not found in known_good_json.") @@ -104,7 +98,7 @@ def main(): ) metadata["hash"] = hash metadata["url"] = repo - # In the case that 'metadata[module_name]' is empty + # In the case that 'metadata[module_name]' is 'local_module' or empty (somehow) # hash & url are already existing and empty inside of 'metadata' # Therefore all 3 keys will be written to needlinks in each branch for d in data[1:]: From 59f760283c45304bb84e10ca586868b713ad1458 Mon Sep 17 00:00:00 2001 From: MaximilianSoerenPollak Date: Thu, 12 Mar 2026 13:40:57 +0100 Subject: [PATCH 07/11] Copilot Findings --- .../score_source_code_linker/__init__.py | 4 ++-- .../module_source_links.py | 14 ++++++++------ .../score_source_code_linker/needlinks.py | 9 +++------ src/helper_lib/additional_functions.py | 6 +++--- 4 files changed, 16 insertions(+), 17 deletions(-) diff --git a/src/extensions/score_source_code_linker/__init__.py b/src/extensions/score_source_code_linker/__init__.py index d82994dd0..6a409b917 100644 --- a/src/extensions/score_source_code_linker/__init__.py +++ b/src/extensions/score_source_code_linker/__init__.py @@ -226,9 +226,9 @@ def register_combined_linker(app: Sphinx): def setup_combined_linker(app: Sphinx, _: BuildEnvironment): grouped_cache = get_cache_filename(app.outdir, "score_scl_grouped_cache.json") - gruped_cache_exists = grouped_cache.exists() + grouped_cache_exists = grouped_cache.exists() # TODO this cache should be done via Bazel - if not gruped_cache_exists or not app.config.skip_rescanning_via_source_code_linker: + if not grouped_cache_exists or not app.config.skip_rescanning_via_source_code_linker: LOGGER.debug( "Did not find combined json 'score_scl_grouped_cache.json' in _build." "Generating new one" diff --git a/src/extensions/score_source_code_linker/module_source_links.py b/src/extensions/score_source_code_linker/module_source_links.py index fe757b947..9197b75ff 100644 --- a/src/extensions/score_source_code_linker/module_source_links.py +++ b/src/extensions/score_source_code_linker/module_source_links.py @@ -27,7 +27,7 @@ @dataclass -class moduleInfo: +class ModuleInfo: name: str hash: str url: str @@ -35,7 +35,7 @@ class moduleInfo: @dataclass class ModuleSourceLinks: - module: moduleInfo + module: ModuleInfo needs: list[SourceCodeLinks] = field(default_factory=list) @@ -48,7 +48,7 @@ def default(self, o: object): if isinstance(o, NeedLink): return o.to_dict_without_metadata() if isinstance( - o, ModuleSourceLinks | SourceCodeLinks | DataForTestLink | NeedSourceLinks + o, (ModuleSourceLinks, SourceCodeLinks, DataForTestLink, NeedSourceLinks) ): return asdict(o) return super().default(o) @@ -61,14 +61,14 @@ def ModuleSourceLinks_JSON_Decoder( module_name = d["module_name"] needs = d["needs"] return ModuleSourceLinks( - module=moduleInfo( + module=ModuleInfo( name=module_name.get("module_name"), hash=module_name.get("hash"), url=module_name.get("url"), ), # We know this can only be list[SourceCodeLinks] and nothing else # Therefore => we ignore the type error here - needs=[SourceCodeLinks_JSON_Decoder(need) for need in needs] # type: ignore + needs=[SourceCodeLinks_JSON_Decoder(need) for need in needs], # type: ignore ) return d @@ -117,7 +117,9 @@ def group_needs_by_module(links: list[SourceCodeLinks]) -> list[ModuleSourceLink if module_key not in module_groups: module_groups[module_key] = ModuleSourceLinks( - module=moduleInfo(name=module_key, hash=first_link.hash, url=first_link.url) + module=ModuleInfo( + name=module_key, hash=first_link.hash, url=first_link.url + ) ) module_groups[module_key].needs.append(source_link) # Much clearer! diff --git a/src/extensions/score_source_code_linker/needlinks.py b/src/extensions/score_source_code_linker/needlinks.py index abcdd0dbb..324e54114 100644 --- a/src/extensions/score_source_code_linker/needlinks.py +++ b/src/extensions/score_source_code_linker/needlinks.py @@ -42,10 +42,10 @@ class NeedLink: hash: str = "" url: str = "" - def to_dict_full(self) -> dict[str, str | Path]: + def to_dict_full(self) -> dict[str, str | Path | int]: return asdict(self) - def to_dict_without_metadata(self) -> dict[str, str | Path]: + def to_dict_without_metadata(self) -> dict[str, str | Path | int]: d = asdict(self) d.pop("module_name", None) d.pop("hash", None) @@ -114,10 +114,7 @@ def store_source_code_links_with_metadata_json( def store_source_code_links_json(file: Path, needlist: list[NeedLink]) -> None: """ Writes a JSON array: - [ meta_dict, needlink1, needlink2, ... ] - - meta_dict must include: - module_name, hash, url + [ needlink1, needlink2, ... ] """ file.parent.mkdir(exist_ok=True) diff --git a/src/helper_lib/additional_functions.py b/src/helper_lib/additional_functions.py index 049a14548..21b4d6fd2 100644 --- a/src/helper_lib/additional_functions.py +++ b/src/helper_lib/additional_functions.py @@ -12,7 +12,7 @@ # ******************************************************************************* from pathlib import Path -from src.extensions.score_source_code_linker.module_source_links import moduleInfo +from src.extensions.score_source_code_linker.module_source_links import ModuleInfo # Import types that depend on score_source_code_linker from src.extensions.score_source_code_linker.needlinks import DefaultNeedLink, NeedLink @@ -28,7 +28,7 @@ def get_github_link( - metadata: moduleInfo, + metadata: ModuleInfo, link: NeedLink | DataForTestLink | DataOfTestCase | None = None, ) -> str: if link is None: @@ -54,7 +54,7 @@ def get_github_link_from_git( def get_github_link_from_json( - metadata: moduleInfo, + metadata: ModuleInfo, link: NeedLink | DataForTestLink | DataOfTestCase | None = None, ): if link is None: From 715544154c9f9b69b3b0ad2848bdb69ac589e315 Mon Sep 17 00:00:00 2001 From: MaximilianSoerenPollak Date: Thu, 12 Mar 2026 14:06:22 +0100 Subject: [PATCH 08/11] Formatting & Linting --- scripts_bazel/generate_sourcelinks_cli.py | 5 ++--- scripts_bazel/merge_sourcelinks.py | 14 +------------- .../score_source_code_linker/__init__.py | 12 ++++-------- .../generate_source_code_links_json.py | 8 +++++--- .../module_source_links.py | 2 +- .../score_source_code_linker/needlinks.py | 2 +- 6 files changed, 14 insertions(+), 29 deletions(-) diff --git a/scripts_bazel/generate_sourcelinks_cli.py b/scripts_bazel/generate_sourcelinks_cli.py index 8b1c4bfd5..ba54b5d23 100644 --- a/scripts_bazel/generate_sourcelinks_cli.py +++ b/scripts_bazel/generate_sourcelinks_cli.py @@ -26,8 +26,8 @@ _extract_references_from_file, # pyright: ignore[reportPrivateUsage] TODO: move it out of the extension and into this script ) from src.extensions.score_source_code_linker.needlinks import ( - store_source_code_links_with_metadata_json, MetaData, + store_source_code_links_with_metadata_json, ) logging.basicConfig(level=logging.INFO, format="%(message)s") @@ -51,8 +51,7 @@ def parse_module_name_from_path(path: Path) -> str: # This allows for files / folders etc. to have `external` in their name too. module_raw = str(path).removeprefix("external/") filepath_split = str(module_raw).split("/", maxsplit=1) - module_name = str(filepath_split[0].removesuffix("+")) - return module_name + return str(filepath_split[0].removesuffix("+")) return "local_module" diff --git a/scripts_bazel/merge_sourcelinks.py b/scripts_bazel/merge_sourcelinks.py index b956b0bda..626b48c56 100644 --- a/scripts_bazel/merge_sourcelinks.py +++ b/scripts_bazel/merge_sourcelinks.py @@ -28,18 +28,6 @@ logging.basicConfig(level=logging.INFO, format="%(message)s") logger = logging.getLogger(__name__) -# [ -# PosixPath('bazel-out/k8-fastbuild/bin/sourcelinks_json.json'), -# PosixPath('bazel-out/k8-fastbuild/bin/external/score_persistency+/sourcelinks_json.json'), -# PosixPath('bazel-out/k8-fastbuild/bin/external/score_orchestrator+/sourcelinks_json.json'), -# PosixPath('bazel-out/k8-fastbuild/bin/external/score_kyron+/sourcelinks_json.json'), -# PosixPath('bazel-out/k8-fastbuild/bin/external/score_baselibs+/sourcelinks_json.json'), -# PosixPath('bazel-out/k8-fastbuild/bin/external/score_baselibs_rust+/sourcelinks_json.json'), -# PosixPath('bazel-out/k8-fastbuild/bin/external/score_logging+/sourcelinks_json.json'), -# PosixPath('bazel-out/k8-fastbuild/bin/external/score_platform+/sourcelinks_json.json'), -# PosixPath('bazel-out/k8-fastbuild/bin/external/score_process+/sourcelinks_json.json'), -# PosixPath('bazel-out/k8-fastbuild/bin/external/score_docs_as_code+/sourcelinks_json.json') -# ] """ @@ -98,7 +86,7 @@ def main(): ) metadata["hash"] = hash metadata["url"] = repo - # In the case that 'metadata[module_name]' is 'local_module' or empty (somehow) + # In the case that 'metadata[module_name]' is 'local_module' # hash & url are already existing and empty inside of 'metadata' # Therefore all 3 keys will be written to needlinks in each branch for d in data[1:]: diff --git a/src/extensions/score_source_code_linker/__init__.py b/src/extensions/score_source_code_linker/__init__.py index 6a409b917..a84ddfb42 100644 --- a/src/extensions/score_source_code_linker/__init__.py +++ b/src/extensions/score_source_code_linker/__init__.py @@ -40,19 +40,15 @@ store_module_source_links_json, ) from src.extensions.score_source_code_linker.need_source_links import ( - NeedSourceLinks, - SourceCodeLinks, group_by_need, load_source_code_links_combined_json, store_source_code_links_combined_json, ) from src.extensions.score_source_code_linker.needlinks import ( - NeedLink, load_source_code_links_json, load_source_code_links_with_metadata_json, ) from src.extensions.score_source_code_linker.testlink import ( - DataForTestLink, load_data_of_test_case_json, load_test_xml_parsed_json, ) @@ -228,7 +224,10 @@ def setup_combined_linker(app: Sphinx, _: BuildEnvironment): grouped_cache = get_cache_filename(app.outdir, "score_scl_grouped_cache.json") grouped_cache_exists = grouped_cache.exists() # TODO this cache should be done via Bazel - if not grouped_cache_exists or not app.config.skip_rescanning_via_source_code_linker: + if ( + not grouped_cache_exists + or not app.config.skip_rescanning_via_source_code_linker + ): LOGGER.debug( "Did not find combined json 'score_scl_grouped_cache.json' in _build." "Generating new one" @@ -351,9 +350,6 @@ def inject_links_into_needs(app: Sphinx, env: BuildEnvironment) -> None: scl_by_module = load_module_source_links_json( get_cache_filename(app.outdir, "score_module_grouped_scl_cache.json") ) - # source_code_links_by_need = load_source_code_links_combined_json( - # get_cache_filename(app.outdir, "score_scl_grouped_cache.json") - # ) for module_grouped_needs in scl_by_module: for source_code_links in module_grouped_needs.needs: need = find_need(needs_copy, source_code_links.need) diff --git a/src/extensions/score_source_code_linker/generate_source_code_links_json.py b/src/extensions/score_source_code_linker/generate_source_code_links_json.py index ce6839b80..33ef6af37 100644 --- a/src/extensions/score_source_code_linker/generate_source_code_links_json.py +++ b/src/extensions/score_source_code_linker/generate_source_code_links_json.py @@ -22,13 +22,13 @@ from sphinx_needs.logging import get_logger -LOGGER = get_logger(__name__) - from src.extensions.score_source_code_linker.needlinks import ( NeedLink, store_source_code_links_json, ) +LOGGER = get_logger(__name__) + TAGS = [ "# " + "req-traceability:", "# " + "req-Id:", @@ -47,7 +47,9 @@ def _extract_references_from_line(line: str): yield tag, req.strip() -def _extract_references_from_file(root: Path, file_path_name: Path, file_path: Path) -> list[NeedLink]: +def _extract_references_from_file( + root: Path, file_path_name: Path, file_path: Path +) -> list[NeedLink]: """Scan a single file for template strings and return findings.""" assert root.is_absolute(), "Root path must be absolute" assert not file_path_name.is_absolute(), "File path must be relative to the root" diff --git a/src/extensions/score_source_code_linker/module_source_links.py b/src/extensions/score_source_code_linker/module_source_links.py index 9197b75ff..887d4b667 100644 --- a/src/extensions/score_source_code_linker/module_source_links.py +++ b/src/extensions/score_source_code_linker/module_source_links.py @@ -48,7 +48,7 @@ def default(self, o: object): if isinstance(o, NeedLink): return o.to_dict_without_metadata() if isinstance( - o, (ModuleSourceLinks, SourceCodeLinks, DataForTestLink, NeedSourceLinks) + o, ModuleSourceLinks | SourceCodeLinks | DataForTestLink | NeedSourceLinks ): return asdict(o) return super().default(o) diff --git a/src/extensions/score_source_code_linker/needlinks.py b/src/extensions/score_source_code_linker/needlinks.py index 324e54114..8bc8f9439 100644 --- a/src/extensions/score_source_code_linker/needlinks.py +++ b/src/extensions/score_source_code_linker/needlinks.py @@ -16,7 +16,7 @@ import os from dataclasses import asdict, dataclass from pathlib import Path -from typing import Any, TypeGuard, TypedDict +from typing import Any, TypedDict, TypeGuard class MetaData(TypedDict): From 2355aa3a4e12ad9984e94349216d35cc516d32ce Mon Sep 17 00:00:00 2001 From: MaximilianSoerenPollak Date: Thu, 12 Mar 2026 14:11:28 +0100 Subject: [PATCH 09/11] Basepyright linting --- .../score_source_code_linker/generate_source_code_links_json.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/extensions/score_source_code_linker/generate_source_code_links_json.py b/src/extensions/score_source_code_linker/generate_source_code_links_json.py index 33ef6af37..7b39b7228 100644 --- a/src/extensions/score_source_code_linker/generate_source_code_links_json.py +++ b/src/extensions/score_source_code_linker/generate_source_code_links_json.py @@ -124,7 +124,7 @@ def find_all_need_references(search_path: Path) -> list[NeedLink]: # Use os.walk to have better control over directory traversal for file in iterate_files_recursively(search_path): - references = _extract_references_from_file(search_path, file) + references = _extract_references_from_file(search_path,Path(file.name), file) all_need_references.extend(references) elapsed_time = os.times().elapsed - start_time From bb475e882c3ec915bf58d4b59b6e60d5405b7c3b Mon Sep 17 00:00:00 2001 From: MaximilianSoerenPollak Date: Fri, 13 Mar 2026 16:51:28 +0100 Subject: [PATCH 10/11] WIP: Testlinks endlink still wrong --- scripts_bazel/BUILD | 1 + scripts_bazel/generate_sourcelinks_cli.py | 23 +------- scripts_bazel/merge_sourcelinks.py | 22 +++----- src/extensions/score_source_code_linker/BUILD | 1 + .../score_source_code_linker/__init__.py | 2 +- .../score_source_code_linker/helpers.py} | 36 ++++++++++++ .../module_source_links.py | 42 ++++++++------ .../score_source_code_linker/testlink.py | 34 +++++++++++ .../tests/test_codelink.py | 2 +- .../test_source_code_link_integration.py | 3 +- .../score_source_code_linker/xml_parser.py | 56 ++++++++++++++++--- src/helper_lib/BUILD | 3 +- src/helper_lib/__init__.py | 2 +- 13 files changed, 160 insertions(+), 67 deletions(-) rename src/{helper_lib/additional_functions.py => extensions/score_source_code_linker/helpers.py} (60%) diff --git a/scripts_bazel/BUILD b/scripts_bazel/BUILD index 81c9212f1..befe51730 100644 --- a/scripts_bazel/BUILD +++ b/scripts_bazel/BUILD @@ -33,6 +33,7 @@ py_binary( py_binary( name = "merge_sourcelinks", srcs = ["merge_sourcelinks.py"], + deps= [ "//src/extensions/score_source_code_linker"], main = "merge_sourcelinks.py", visibility = ["//visibility:public"], ) diff --git a/scripts_bazel/generate_sourcelinks_cli.py b/scripts_bazel/generate_sourcelinks_cli.py index ba54b5d23..e1feb8e0b 100644 --- a/scripts_bazel/generate_sourcelinks_cli.py +++ b/scripts_bazel/generate_sourcelinks_cli.py @@ -25,36 +25,17 @@ from src.extensions.score_source_code_linker.generate_source_code_links_json import ( _extract_references_from_file, # pyright: ignore[reportPrivateUsage] TODO: move it out of the extension and into this script ) +from src.extensions.score_source_code_linker.helpers import parse_module_name_from_path from src.extensions.score_source_code_linker.needlinks import ( MetaData, store_source_code_links_with_metadata_json, ) + logging.basicConfig(level=logging.INFO, format="%(message)s") logger = logging.getLogger(__name__) -def parse_module_name_from_path(path: Path) -> str: - """ - Parse out the Module-Name from the filename gotten - /home/user/.cache/bazel/aksj37981712/external/score_docs_as_code+/src/tests/testfile.py - => score_docs_as_code - """ - - # COMBO BUILD - # If external is in the filepath that gets parsed => - # file is in an external module => combo build - # Example Path: - # PosixPath('external/score_docs_as_code+/src/helper_lib/test_helper_lib.py' - - if str(path).startswith("external/"): - # This allows for files / folders etc. to have `external` in their name too. - module_raw = str(path).removeprefix("external/") - filepath_split = str(module_raw).split("/", maxsplit=1) - return str(filepath_split[0].removesuffix("+")) - return "local_module" - - def main(): parser = argparse.ArgumentParser( description="Generate source code links JSON from source files" diff --git a/scripts_bazel/merge_sourcelinks.py b/scripts_bazel/merge_sourcelinks.py index 626b48c56..afb2cc198 100644 --- a/scripts_bazel/merge_sourcelinks.py +++ b/scripts_bazel/merge_sourcelinks.py @@ -21,9 +21,7 @@ import sys from pathlib import Path -# from src.extensions.score_source_code_linker.need_source_links import ( -# store_source_code_links_combined_json, -# ) +from src.extensions.score_source_code_linker.helpers import parse_info_from_known_good logging.basicConfig(level=logging.INFO, format="%(message)s") logger = logging.getLogger(__name__) @@ -38,17 +36,10 @@ """ -def parse_info_from_known_good( - known_good_json: Path, module_name: str -) -> tuple[str, str]: - with open(known_good_json) as f: - kg_json = json.load(f) - for category in kg_json["modules"].values(): - if module_name in category: - m = category[module_name] - return (m["hash"], m["repo"].removesuffix(".git")) - raise KeyError(f"Module {module_name!r} not found in known_good_json.") +def add_needid_to_metaneed_mapping(mapping: dict[str, dict[str, str]], metadata: dict[str, str], needid: str): + mapping + pass def main(): parser = argparse.ArgumentParser( @@ -76,6 +67,7 @@ def main(): all_files = [x for x in args.files if "known_good.json" not in str(x)] merged = [] + needs_metadata_mapping = {} for json_file in all_files: with open(json_file) as f: data = json.load(f) @@ -89,11 +81,11 @@ def main(): # In the case that 'metadata[module_name]' is 'local_module' # hash & url are already existing and empty inside of 'metadata' # Therefore all 3 keys will be written to needlinks in each branch + for d in data[1:]: d.update(metadata) assert isinstance(data, list), repr(data) - merged.extend(data[1:]) - + merged.extend(data[1:]) with open(args.output, "w") as f: json.dump(merged, f, indent=2, ensure_ascii=False) diff --git a/src/extensions/score_source_code_linker/BUILD b/src/extensions/score_source_code_linker/BUILD index e3c289c66..55d471374 100644 --- a/src/extensions/score_source_code_linker/BUILD +++ b/src/extensions/score_source_code_linker/BUILD @@ -54,6 +54,7 @@ py_library( "needlinks.py", "testlink.py", "xml_parser.py", + "helpers.py", ], imports = ["."], visibility = ["//visibility:public"], diff --git a/src/extensions/score_source_code_linker/__init__.py b/src/extensions/score_source_code_linker/__init__.py index a84ddfb42..c687e2e3e 100644 --- a/src/extensions/score_source_code_linker/__init__.py +++ b/src/extensions/score_source_code_linker/__init__.py @@ -60,7 +60,7 @@ find_git_root, find_ws_root, ) -from src.helper_lib.additional_functions import get_github_link +from src.extensions.score_source_code_linker.helpers import get_github_link LOGGER = get_logger(__name__) # Uncomment this to enable more verbose logging diff --git a/src/helper_lib/additional_functions.py b/src/extensions/score_source_code_linker/helpers.py similarity index 60% rename from src/helper_lib/additional_functions.py rename to src/extensions/score_source_code_linker/helpers.py index 21b4d6fd2..5544e2696 100644 --- a/src/helper_lib/additional_functions.py +++ b/src/extensions/score_source_code_linker/helpers.py @@ -10,6 +10,7 @@ # # SPDX-License-Identifier: Apache-2.0 # ******************************************************************************* +import json from pathlib import Path from src.extensions.score_source_code_linker.module_source_links import ModuleInfo @@ -62,3 +63,38 @@ def get_github_link_from_json( base_url = metadata.url current_hash = metadata.hash return f"{base_url}/blob/{current_hash}/{link.file}#L{link.line}" + + +def parse_module_name_from_path(path: Path) -> str: + """ + Parse out the Module-Name from the filename gotten + /home/user/.cache/bazel/aksj37981712/external/score_docs_as_code+/src/tests/testfile.py + => score_docs_as_code + """ + + # COMBO BUILD + # If external is in the filepath that gets parsed => + # file is in an external module => combo build + # Example Path: + # PosixPath('external/score_docs_as_code+/src/helper_lib/test_helper_lib.py' + + if str(path).startswith("external/"): + # This allows for files / folders etc. to have `external` in their name too. + module_raw = str(path).removeprefix("external/") + filepath_split = str(module_raw).split("/", maxsplit=1) + return str(filepath_split[0].removesuffix("+")) + # We return this when we are in a local build `//:docs` the rest of DaC knows + # What to do then if it encounters this module_name + return "local_module" + + +def parse_info_from_known_good( + known_good_json: Path, module_name: str +) -> tuple[str, str]: + with open(known_good_json) as f: + kg_json = json.load(f) + for category in kg_json["modules"].values(): + if module_name in category: + m = category[module_name] + return (m["hash"], m["repo"].removesuffix(".git")) + raise KeyError(f"Module {module_name!r} not found in known_good_json.") diff --git a/src/extensions/score_source_code_linker/module_source_links.py b/src/extensions/score_source_code_linker/module_source_links.py index 887d4b667..97dc27a68 100644 --- a/src/extensions/score_source_code_linker/module_source_links.py +++ b/src/extensions/score_source_code_linker/module_source_links.py @@ -15,6 +15,7 @@ import json from dataclasses import asdict, dataclass, field from pathlib import Path +from re import M from typing import Any from src.extensions.score_source_code_linker.need_source_links import ( @@ -43,13 +44,12 @@ class ModuleSourceLinks_JSON_Encoder(json.JSONEncoder): def default(self, o: object): if isinstance(o, Path): return str(o) - # We do not want to save the metadata inside the codelink + # We do not want to save the metadata inside the codelink or testlink + # As we save this already in a structure above it # (hash, module_name, url) - if isinstance(o, NeedLink): + if isinstance(o, NeedLink | DataForTestLink): return o.to_dict_without_metadata() - if isinstance( - o, ModuleSourceLinks | SourceCodeLinks | DataForTestLink | NeedSourceLinks - ): + if isinstance(o, ModuleSourceLinks | SourceCodeLinks | NeedSourceLinks): return asdict(o) return super().default(o) @@ -57,14 +57,14 @@ def default(self, o: object): def ModuleSourceLinks_JSON_Decoder( d: dict[str, Any], ) -> ModuleSourceLinks | dict[str, Any]: - if "module_name" in d and "needs" in d: - module_name = d["module_name"] + if "module" in d and "needs" in d: + module = d["module"] needs = d["needs"] return ModuleSourceLinks( module=ModuleInfo( - name=module_name.get("module_name"), - hash=module_name.get("hash"), - url=module_name.get("url"), + name=module.get("module_name"), + hash=module.get("hash"), + url=module.get("url"), ), # We know this can only be list[SourceCodeLinks] and nothing else # Therefore => we ignore the type error here @@ -95,12 +95,16 @@ def load_module_source_links_json(file: Path) -> list[ModuleSourceLinks]: object_hook=ModuleSourceLinks_JSON_Decoder, ) assert isinstance(links, list), ( - "The combined source code linker links should be " - "a list of SourceCodeLinks objects." + "The ModuleSourceLink json should be aa list of ModuleSourceLink objects." ) + print("=====================") + print("=== TESTING LINKS IN ModuleSourceLink === ") + for link in links: + if not isinstance(link, ModuleSourceLinks): + print(f"Link not module_sourcelink: {link}") + print("=====================") assert all(isinstance(link, ModuleSourceLinks) for link in links), ( - "All items in combined_source_code_linker_cache should be " - "SourceCodeLinks objects." + "All items in module source link cache should be ModuleSourceLink objects." ) return links @@ -109,10 +113,14 @@ def group_needs_by_module(links: list[SourceCodeLinks]) -> list[ModuleSourceLink module_groups: dict[str, ModuleSourceLinks] = {} for source_link in links: - if not source_link.links.CodeLinks: + # Check if we can take moduleInfo from code or testlinks + if source_link.links.CodeLinks: + first_link = source_link.links.CodeLinks[0] + elif source_link.links.TestLinks: + first_link = source_link.links.TestLinks[0] + else: + # This should not happen? continue - - first_link = source_link.links.CodeLinks[0] module_key = first_link.module_name if module_key not in module_groups: diff --git a/src/extensions/score_source_code_linker/testlink.py b/src/extensions/score_source_code_linker/testlink.py index ee83c7f95..f001cc203 100644 --- a/src/extensions/score_source_code_linker/testlink.py +++ b/src/extensions/score_source_code_linker/testlink.py @@ -30,6 +30,7 @@ from sphinx_needs import logging + LOGGER = logging.get_logger(__name__) @@ -42,6 +43,19 @@ class DataForTestLink: verify_type: str result: str result_text: str = "" + module_name: str = "" + hash: str = "" + url: str = "" + + def to_dict_full(self) -> dict[str, str | Path | int]: + return asdict(self) + + def to_dict_without_metadata(self) -> dict[str, str | Path | int]: + d = asdict(self) + d.pop("module_name", None) + d.pop("hash", None) + d.pop("url", None) + return d class DataForTestLink_JSON_Encoder(json.JSONEncoder): @@ -60,6 +74,9 @@ def DataForTestLink_JSON_Decoder(d: dict[str, Any]) -> DataForTestLink | dict[st "line", "need", "verify_type", + "module_name", + "hash", + "url", "result", "result_text", } <= d.keys(): @@ -68,6 +85,9 @@ def DataForTestLink_JSON_Decoder(d: dict[str, Any]) -> DataForTestLink | dict[st file=Path(d["file"]), line=d["line"], need=d["need"], + module_name=d.get("module_name", ""), + hash=d.get("hash", ""), + url=d.get("url", ""), verify_type=d["verify_type"], result=d["result"], result_text=d["result_text"], @@ -83,6 +103,9 @@ class DataOfTestCase: file: str | None = None line: str | None = None result: str | None = None # passed | falied | skipped | disabled + module_name: str | None = None + hash: str | None = None + url: str | None = None # Intentionally not snakecase to make dict parsing simple TestType: str | None = None DerivationTechnique: str | None = None @@ -98,6 +121,9 @@ def from_dict(cls, data: dict[str, Any]): # type-ignore file=data.get("file"), line=data.get("line"), result=data.get("result"), + module_name=data.get("module_name"), + hash=data.get("hash"), + url=data.get("url"), TestType=data.get("TestType"), DerivationTechnique=data.get("DerivationTechnique"), result_text=data.get("result_text"), @@ -158,6 +184,8 @@ def is_valid(self) -> bool: # and self.TestType is not None # and self.DerivationTechnique is not None # ): + # Hash & URL are explictily allowed to be empty but not none. + # module_name has to be always filled or something went wrong fields = [ x for x in self.__dataclass_fields__ @@ -199,6 +227,9 @@ def parse_attributes(verify_field: str | None, verify_type: str): assert self.file is not None assert self.line is not None assert self.result is not None + assert self.module_name is not None + assert self.hash is not None + assert self.url is not None assert self.result_text is not None assert self.TestType is not None assert self.DerivationTechnique is not None @@ -212,6 +243,9 @@ def parse_attributes(verify_field: str | None, verify_type: str): verify_type=verify_type, result=self.result, result_text=self.result_text, + module_name=self.module_name, + hash=self.hash, + url=self.url, ) return list( diff --git a/src/extensions/score_source_code_linker/tests/test_codelink.py b/src/extensions/score_source_code_linker/tests/test_codelink.py index 29ddc7235..9615c0058 100644 --- a/src/extensions/score_source_code_linker/tests/test_codelink.py +++ b/src/extensions/score_source_code_linker/tests/test_codelink.py @@ -43,7 +43,7 @@ from src.helper_lib import ( get_current_git_hash, ) -from src.helper_lib.additional_functions import get_github_link +from src.extensions.score_source_code_linker.helpers import get_github_link """ # ────────────────ATTENTION─────────────── diff --git a/src/extensions/score_source_code_linker/tests/test_source_code_link_integration.py b/src/extensions/score_source_code_linker/tests/test_source_code_link_integration.py index 60bb98f80..01bb7ff58 100644 --- a/src/extensions/score_source_code_linker/tests/test_source_code_link_integration.py +++ b/src/extensions/score_source_code_linker/tests/test_source_code_link_integration.py @@ -37,7 +37,8 @@ SourceCodeLinks_TEST_JSON_Decoder, ) from src.helper_lib import find_ws_root, get_github_base_url -from src.helper_lib.additional_functions import get_github_link + +from src.extensions.score_source_code_linker.helpers import get_github_link @pytest.fixture() diff --git a/src/extensions/score_source_code_linker/xml_parser.py b/src/extensions/score_source_code_linker/xml_parser.py index 8432e1fc3..6facb983b 100644 --- a/src/extensions/score_source_code_linker/xml_parser.py +++ b/src/extensions/score_source_code_linker/xml_parser.py @@ -33,18 +33,41 @@ from sphinx_needs import logging from sphinx_needs.api import add_external_need +from src.extensions.score_source_code_linker.needlinks import ( + MetaData, +) +from src.extensions.score_source_code_linker.module_source_links import ModuleInfo from src.extensions.score_source_code_linker.testlink import ( DataOfTestCase, store_data_of_test_case_json, store_test_xml_parsed_json, ) from src.helper_lib import find_ws_root -from src.helper_lib.additional_functions import get_github_link +from src.extensions.score_source_code_linker.helpers import ( + get_github_link, + parse_info_from_known_good, + parse_module_name_from_path, +) logger = logging.get_logger(__name__) logger.setLevel("DEBUG") +def get_metadata_from_test_path(filepath: Path) -> MetaData: + known_good_json = os.environ.get("KNOWN_GOOD_JSON") + module_name = parse_module_name_from_path(filepath) + md: MetaData = { + "module_name": module_name, + "hash": "", + "url": "", + } + if module_name != "local_module" and known_good_json: + md["hash"], md["url"] = parse_info_from_known_good( + Path(known_good_json), module_name + ) + return md + + def parse_testcase_result(testcase: ET.Element) -> tuple[str, str]: """ Returns 'result' and 'result_text' found in the 'message' @@ -101,7 +124,7 @@ def read_test_xml_file(file: Path) -> tuple[list[DataOfTestCase], list[str], lis missing_prop_tests: list[str] = [] tree = ET.parse(file) root = tree.getroot() - + md = get_metadata_from_test_path(file) for testsuite in root.findall("testsuite"): for testcase in testsuite.findall("testcase"): case_properties = {} @@ -161,6 +184,7 @@ def read_test_xml_file(file: Path) -> tuple[list[DataOfTestCase], list[str], lis # If the is_valid method would return 'False' anyway. # I just can't think of it right now, leaving this for future me case_properties = parse_properties(case_properties, properties_element) + case_properties.update(md) test_case = DataOfTestCase.from_dict(case_properties) if not test_case.is_valid(): missing_prop_tests.append(testname) @@ -169,6 +193,7 @@ def read_test_xml_file(file: Path) -> tuple[list[DataOfTestCase], list[str], lis return test_case_needs, non_prop_tests, missing_prop_tests +# /home/maximilianp/score_personal/reference_integration/bazel-testlogs/external/score_docs_as_code+/src/helper_lib/helper_lib_tests/test.xml def find_xml_files(dir: Path) -> list[Path]: """ Recursively search all test.xml files inside 'bazel-testlogs' @@ -183,18 +208,21 @@ def find_xml_files(dir: Path) -> list[Path]: for root, _, files in os.walk(dir): if test_file_name in files: xml_paths.append(Path(os.path.join(root, test_file_name))) + print("=========================================") + print(xml_paths[0]) + print("=========================================") return xml_paths -def find_test_folder(base_path: Path | None = None) -> Path | None: +def find_test_folder(base_path: Path | None = None) -> tuple[Path | None, Path | None]: ws_root = base_path if base_path is not None else find_ws_root() assert ws_root is not None if os.path.isdir(ws_root / "tests-report"): - return ws_root / "tests-report" + return ws_root, ws_root / "tests-report" if os.path.isdir(ws_root / "bazel-testlogs"): - return ws_root / "bazel-testlogs" + return ws_root, ws_root / "bazel-testlogs" logger.info("could not find tests-report or bazel-testlogs to parse testcases") - return None + return ws_root, None def run_xml_parser(app: Sphinx, env: BuildEnvironment): @@ -203,11 +231,19 @@ def run_xml_parser(app: Sphinx, env: BuildEnvironment): building testcase needs. It gets called from the source_code_linker __init__ """ - testlogs_dir = find_test_folder() + root_path, testlogs_dir = find_test_folder() # early return if testlogs_dir is None: return xml_file_paths = find_xml_files(testlogs_dir) + # scl_with_metadata = load_source_code_links_with_metadata_json( + # app.outdir / "score_source_links_metadata.json" + # )[0] + # metadata: MetaData = { + # "module_name": scl_with_metadata.module_name, + # "hash": scl_with_metadata.hash, + # "url": scl_with_metadata.url, + # } test_case_needs = build_test_needs_from_files(app, env, xml_file_paths) # Saving the test case needs for cache store_data_of_test_case_json( @@ -262,6 +298,10 @@ def construct_and_add_need(app: Sphinx, tn: DataOfTestCase): # and either 'Fully' or 'PartiallyVerifies' should not be None here assert tn.file is not None assert tn.name is not None + assert tn.module_name is not None + assert tn.hash is not None + assert tn.url is not None + metadata = ModuleInfo(name=tn.module_name, hash=tn.hash, url=tn.url) # IDK if this is ideal or not with contextlib.suppress(BaseException): _ = add_external_need( @@ -271,7 +311,7 @@ def construct_and_add_need(app: Sphinx, tn: DataOfTestCase): tags="TEST", id=f"testcase__{tn.name}_{short_hash(tn.file + tn.name).upper()}", name=tn.name, - external_url=get_github_link(tn), + external_url=get_github_link(metadata, tn), fully_verifies=tn.FullyVerifies if tn.FullyVerifies is not None else "", partially_verifies=tn.PartiallyVerifies if tn.PartiallyVerifies is not None diff --git a/src/helper_lib/BUILD b/src/helper_lib/BUILD index 748a2a730..ad6316363 100644 --- a/src/helper_lib/BUILD +++ b/src/helper_lib/BUILD @@ -27,8 +27,7 @@ py_library( visibility = ["//visibility:public"], deps = [ "@rules_python//python/runfiles", - "@score_docs_as_code//src/extensions/score_source_code_linker:source_code_linker_helpers", - ], + ] + all_requirements, ) score_py_pytest( diff --git a/src/helper_lib/__init__.py b/src/helper_lib/__init__.py index 5699e478d..a72fffb0b 100644 --- a/src/helper_lib/__init__.py +++ b/src/helper_lib/__init__.py @@ -17,7 +17,7 @@ from pathlib import Path from typing import Any -from runfiles import Runfiles +from python.runfiles import Runfiles from sphinx.config import Config from sphinx_needs.logging import get_logger From 996a51e2da92535bcd0f300c84f907b910b22395 Mon Sep 17 00:00:00 2001 From: MaximilianSoerenPollak Date: Fri, 13 Mar 2026 16:52:42 +0100 Subject: [PATCH 11/11] Formatting --- scripts_bazel/generate_sourcelinks_cli.py | 1 - scripts_bazel/merge_sourcelinks.py | 2 +- src/extensions/score_source_code_linker/__init__.py | 2 +- .../score_source_code_linker/module_source_links.py | 1 - src/extensions/score_source_code_linker/testlink.py | 1 - .../score_source_code_linker/tests/test_codelink.py | 2 +- .../tests/test_source_code_link_integration.py | 3 +-- .../score_source_code_linker/xml_parser.py | 12 ++++++------ 8 files changed, 10 insertions(+), 14 deletions(-) diff --git a/scripts_bazel/generate_sourcelinks_cli.py b/scripts_bazel/generate_sourcelinks_cli.py index e1feb8e0b..6f09ef075 100644 --- a/scripts_bazel/generate_sourcelinks_cli.py +++ b/scripts_bazel/generate_sourcelinks_cli.py @@ -31,7 +31,6 @@ store_source_code_links_with_metadata_json, ) - logging.basicConfig(level=logging.INFO, format="%(message)s") logger = logging.getLogger(__name__) diff --git a/scripts_bazel/merge_sourcelinks.py b/scripts_bazel/merge_sourcelinks.py index afb2cc198..932d91380 100644 --- a/scripts_bazel/merge_sourcelinks.py +++ b/scripts_bazel/merge_sourcelinks.py @@ -85,7 +85,7 @@ def main(): for d in data[1:]: d.update(metadata) assert isinstance(data, list), repr(data) - merged.extend(data[1:]) + merged.extend(data[1:]) with open(args.output, "w") as f: json.dump(merged, f, indent=2, ensure_ascii=False) diff --git a/src/extensions/score_source_code_linker/__init__.py b/src/extensions/score_source_code_linker/__init__.py index c687e2e3e..d80754036 100644 --- a/src/extensions/score_source_code_linker/__init__.py +++ b/src/extensions/score_source_code_linker/__init__.py @@ -34,6 +34,7 @@ from src.extensions.score_source_code_linker.generate_source_code_links_json import ( generate_source_code_links_json, ) +from src.extensions.score_source_code_linker.helpers import get_github_link from src.extensions.score_source_code_linker.module_source_links import ( group_needs_by_module, load_module_source_links_json, @@ -60,7 +61,6 @@ find_git_root, find_ws_root, ) -from src.extensions.score_source_code_linker.helpers import get_github_link LOGGER = get_logger(__name__) # Uncomment this to enable more verbose logging diff --git a/src/extensions/score_source_code_linker/module_source_links.py b/src/extensions/score_source_code_linker/module_source_links.py index 97dc27a68..b77450eb8 100644 --- a/src/extensions/score_source_code_linker/module_source_links.py +++ b/src/extensions/score_source_code_linker/module_source_links.py @@ -15,7 +15,6 @@ import json from dataclasses import asdict, dataclass, field from pathlib import Path -from re import M from typing import Any from src.extensions.score_source_code_linker.need_source_links import ( diff --git a/src/extensions/score_source_code_linker/testlink.py b/src/extensions/score_source_code_linker/testlink.py index f001cc203..bd7139271 100644 --- a/src/extensions/score_source_code_linker/testlink.py +++ b/src/extensions/score_source_code_linker/testlink.py @@ -30,7 +30,6 @@ from sphinx_needs import logging - LOGGER = logging.get_logger(__name__) diff --git a/src/extensions/score_source_code_linker/tests/test_codelink.py b/src/extensions/score_source_code_linker/tests/test_codelink.py index 9615c0058..9d3fa78f5 100644 --- a/src/extensions/score_source_code_linker/tests/test_codelink.py +++ b/src/extensions/score_source_code_linker/tests/test_codelink.py @@ -35,6 +35,7 @@ get_cache_filename, group_by_need, ) +from src.extensions.score_source_code_linker.helpers import get_github_link from src.extensions.score_source_code_linker.needlinks import ( NeedLink, load_source_code_links_json, @@ -43,7 +44,6 @@ from src.helper_lib import ( get_current_git_hash, ) -from src.extensions.score_source_code_linker.helpers import get_github_link """ # ────────────────ATTENTION─────────────── diff --git a/src/extensions/score_source_code_linker/tests/test_source_code_link_integration.py b/src/extensions/score_source_code_linker/tests/test_source_code_link_integration.py index 01bb7ff58..b4e65279d 100644 --- a/src/extensions/score_source_code_linker/tests/test_source_code_link_integration.py +++ b/src/extensions/score_source_code_linker/tests/test_source_code_link_integration.py @@ -25,6 +25,7 @@ from sphinx.testing.util import SphinxTestApp from sphinx_needs.data import SphinxNeedsData +from src.extensions.score_source_code_linker.helpers import get_github_link from src.extensions.score_source_code_linker.needlinks import NeedLink from src.extensions.score_source_code_linker.testlink import ( DataForTestLink, @@ -38,8 +39,6 @@ ) from src.helper_lib import find_ws_root, get_github_base_url -from src.extensions.score_source_code_linker.helpers import get_github_link - @pytest.fixture() def sphinx_base_dir(tmp_path_factory: TempPathFactory) -> Path: diff --git a/src/extensions/score_source_code_linker/xml_parser.py b/src/extensions/score_source_code_linker/xml_parser.py index 6facb983b..123ebd5d8 100644 --- a/src/extensions/score_source_code_linker/xml_parser.py +++ b/src/extensions/score_source_code_linker/xml_parser.py @@ -33,21 +33,21 @@ from sphinx_needs import logging from sphinx_needs.api import add_external_need +from src.extensions.score_source_code_linker.helpers import ( + get_github_link, + parse_info_from_known_good, + parse_module_name_from_path, +) +from src.extensions.score_source_code_linker.module_source_links import ModuleInfo from src.extensions.score_source_code_linker.needlinks import ( MetaData, ) -from src.extensions.score_source_code_linker.module_source_links import ModuleInfo from src.extensions.score_source_code_linker.testlink import ( DataOfTestCase, store_data_of_test_case_json, store_test_xml_parsed_json, ) from src.helper_lib import find_ws_root -from src.extensions.score_source_code_linker.helpers import ( - get_github_link, - parse_info_from_known_good, - parse_module_name_from_path, -) logger = logging.get_logger(__name__) logger.setLevel("DEBUG")