diff --git a/README.md b/README.md index 5d56875..b586719 100644 --- a/README.md +++ b/README.md @@ -68,12 +68,12 @@ python3 -m google.adk.scope.extractors.python.extractor \ ``` -### Feature Matching +### Feature Matching & Reporting -Once you have extracted features from two languages (e.g., Python and TypeScript), you can compare them using the `match.sh` script. +Once you have extracted features from two languages (e.g., Python and TypeScript), you can compare them using the `report.sh` script. ```bash -./match.sh \ +./report.sh \ --base output/py.txtpb \ --target output/ts.txtpb \ --output output/ \ diff --git a/proto/features.proto b/proto/features.proto index de94a94..cf78926 100644 --- a/proto/features.proto +++ b/proto/features.proto @@ -15,6 +15,7 @@ enum ParamType { MAP = 5; SET = 6; UNKNOWN = 7; + NULL = 8; } @@ -60,7 +61,7 @@ message Feature { repeated string original_return_types = 12; // Raw returns (e.g., "Future"). repeated string normalized_return_types = 13; // Canonical returns (e.g., "STRING"). - + optional bool async = 14; // true if it is an async call. } diff --git a/match.sh b/report.sh similarity index 96% rename from match.sh rename to report.sh index 970f5b1..e5cd771 100755 --- a/match.sh +++ b/report.sh @@ -76,7 +76,7 @@ SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" export PYTHONPATH="${SCRIPT_DIR}/src:${PYTHONPATH}" # Run the python matcher -python3 "${SCRIPT_DIR}/src/google/adk/scope/matcher/matcher.py" \ +python3 "${SCRIPT_DIR}/src/google/adk/scope/reporter/reporter.py" \ --base "${BASE_FILE}" \ --target "${TARGET_FILE}" \ --output "${FULL_OUTPUT_PATH}" \ diff --git a/run.sh b/run.sh index 27e4b81..437791a 100755 --- a/run.sh +++ b/run.sh @@ -5,10 +5,10 @@ echo "Extracting TypeScript features..." ./extract.sh --language typescript --input-repo ../adk-js ./output echo "Generating symmetric reports..." -./match.sh --base output/py.txtpb --target output/ts.txtpb --output ./output --report-type symmetric +./report.sh --base output/py.txtpb --target output/ts.txtpb --output ./output --report-type symmetric echo "Generating directional reports.. ." -./match.sh --base output/py.txtpb --target output/ts.txtpb --output ./output --report-type directional +./report.sh --base output/py.txtpb --target output/ts.txtpb --output ./output --report-type directional echo "Generating raw reports..." -./match.sh --base output/py.txtpb --target output/ts.txtpb --output ./output --report-type raw \ No newline at end of file +./report.sh --base output/py.txtpb --target output/ts.txtpb --output ./output --report-type raw \ No newline at end of file diff --git a/src/google/adk/scope/extractors/converter_py.py b/src/google/adk/scope/extractors/converter_py.py index 16bc07b..8298e42 100644 --- a/src/google/adk/scope/extractors/converter_py.py +++ b/src/google/adk/scope/extractors/converter_py.py @@ -382,11 +382,9 @@ def _process_param_node(self, node: Node) -> Optional[feature_pb2.Param]: # Protocol Buffer enums don't have NULL usually. # Let's drop "null" from the enum list for now, or map to # UNKNOWN if forced. - if s == "null": - continue - try: - enum_val = getattr(feature_pb2.ParamType, s) + # s is lowercase from normalizer, enum is uppercase + enum_val = getattr(feature_pb2.ParamType, s.upper()) normalized_enums.append(enum_val) except AttributeError: # Fallback to OBJECT or UNKNOWN? diff --git a/src/google/adk/scope/extractors/converter_ts.py b/src/google/adk/scope/extractors/converter_ts.py index 26d956c..7f900dd 100644 --- a/src/google/adk/scope/extractors/converter_ts.py +++ b/src/google/adk/scope/extractors/converter_ts.py @@ -566,7 +566,7 @@ def _create_single_param( normalized_enums = [] for s in normalized_strings: try: - enum_val = getattr(feature_pb2.ParamType, s) + enum_val = getattr(feature_pb2.ParamType, s.upper()) normalized_enums.append(enum_val) except AttributeError: normalized_enums.append(feature_pb2.ParamType.OBJECT) @@ -613,7 +613,8 @@ def _extract_return_types(self, node: Node) -> Tuple[List[str], List[str]]: # logically T for async? # Schema says "original_return_types". # normalized usually unwrap? - return [raw], self.normalizer.normalize(raw, "typescript") + normalized = self.normalizer.normalize(raw, "typescript") + return [raw], normalized return [], [] def _is_blocking(self, node: Node, return_types: List[str]) -> bool: diff --git a/src/google/adk/scope/features_pb2.py b/src/google/adk/scope/features_pb2.py index 16b51e1..44ab9b1 100644 --- a/src/google/adk/scope/features_pb2.py +++ b/src/google/adk/scope/features_pb2.py @@ -19,7 +19,7 @@ DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile( - b'\n\x0e\x66\x65\x61tures.proto\x12\x0fgoogle.adk.meta"\xc4\x01\n\x05Param\x12\x15\n\roriginal_name\x18\x01 \x01(\t\x12\x17\n\x0fnormalized_name\x18\x02 \x01(\t\x12\x16\n\x0eoriginal_types\x18\x03 \x03(\t\x12\x34\n\x10normalized_types\x18\x04 \x03(\x0e\x32\x1a.google.adk.meta.ParamType\x12\x18\n\x0b\x64\x65scription\x18\x05 \x01(\tH\x00\x88\x01\x01\x12\x13\n\x0bis_optional\x18\x06 \x01(\x08\x42\x0e\n\x0c_description"\xdc\x04\n\x07\x46\x65\x61ture\x12\x15\n\roriginal_name\x18\x01 \x01(\t\x12\x17\n\x0fnormalized_name\x18\x02 \x01(\t\x12\x18\n\x0b\x64\x65scription\x18\x03 \x01(\tH\x00\x88\x01\x01\x12\x11\n\tmember_of\x18\x04 \x01(\t\x12\x1c\n\x14normalized_member_of\x18\x05 \x01(\t\x12\x38\n\x08maturity\x18\x06 \x01(\x0e\x32!.google.adk.meta.Feature.MaturityH\x01\x88\x01\x01\x12+\n\x04type\x18\x07 \x01(\x0e\x32\x1d.google.adk.meta.Feature.Type\x12\x11\n\tfile_path\x18\x08 \x01(\t\x12\x11\n\tnamespace\x18\t \x01(\t\x12\x1c\n\x14normalized_namespace\x18\n \x01(\t\x12*\n\nparameters\x18\x0b \x03(\x0b\x32\x16.google.adk.meta.Param\x12\x1d\n\x15original_return_types\x18\x0c \x03(\t\x12\x1f\n\x17normalized_return_types\x18\r \x03(\t\x12\x12\n\x05\x61sync\x18\x0e \x01(\x08H\x02\x88\x01\x01"6\n\x08Maturity\x12\x10\n\x0c\x45XPERIMENTAL\x10\x00\x12\x08\n\x04\x42\x45TA\x10\x01\x12\x0e\n\nDEPRECATED\x10\x02"L\n\x04Type\x12\x0c\n\x08\x46UNCTION\x10\x00\x12\x13\n\x0fINSTANCE_METHOD\x10\x01\x12\x10\n\x0c\x43LASS_METHOD\x10\x02\x12\x0f\n\x0b\x43ONSTRUCTOR\x10\x03\x42\x0e\n\x0c_descriptionB\x0b\n\t_maturityB\x08\n\x06_async"`\n\x0f\x46\x65\x61tureRegistry\x12\x10\n\x08language\x18\x01 \x01(\t\x12\x0f\n\x07version\x18\x02 \x01(\t\x12*\n\x08\x66\x65\x61tures\x18\x03 \x03(\x0b\x32\x18.google.adk.meta.Feature*e\n\tParamType\x12\n\n\x06OBJECT\x10\x00\x12\n\n\x06STRING\x10\x01\x12\n\n\x06NUMBER\x10\x02\x12\x0b\n\x07\x42OOLEAN\x10\x03\x12\x08\n\x04LIST\x10\x04\x12\x07\n\x03MAP\x10\x05\x12\x07\n\x03SET\x10\x06\x12\x0b\n\x07UNKNOWN\x10\x07\x62\x06proto3' + b'\n\x0e\x66\x65\x61tures.proto\x12\x0fgoogle.adk.meta"\xc4\x01\n\x05Param\x12\x15\n\roriginal_name\x18\x01 \x01(\t\x12\x17\n\x0fnormalized_name\x18\x02 \x01(\t\x12\x16\n\x0eoriginal_types\x18\x03 \x03(\t\x12\x34\n\x10normalized_types\x18\x04 \x03(\x0e\x32\x1a.google.adk.meta.ParamType\x12\x18\n\x0b\x64\x65scription\x18\x05 \x01(\tH\x00\x88\x01\x01\x12\x13\n\x0bis_optional\x18\x06 \x01(\x08\x42\x0e\n\x0c_description"\xdc\x04\n\x07\x46\x65\x61ture\x12\x15\n\roriginal_name\x18\x01 \x01(\t\x12\x17\n\x0fnormalized_name\x18\x02 \x01(\t\x12\x18\n\x0b\x64\x65scription\x18\x03 \x01(\tH\x00\x88\x01\x01\x12\x11\n\tmember_of\x18\x04 \x01(\t\x12\x1c\n\x14normalized_member_of\x18\x05 \x01(\t\x12\x38\n\x08maturity\x18\x06 \x01(\x0e\x32!.google.adk.meta.Feature.MaturityH\x01\x88\x01\x01\x12+\n\x04type\x18\x07 \x01(\x0e\x32\x1d.google.adk.meta.Feature.Type\x12\x11\n\tfile_path\x18\x08 \x01(\t\x12\x11\n\tnamespace\x18\t \x01(\t\x12\x1c\n\x14normalized_namespace\x18\n \x01(\t\x12*\n\nparameters\x18\x0b \x03(\x0b\x32\x16.google.adk.meta.Param\x12\x1d\n\x15original_return_types\x18\x0c \x03(\t\x12\x1f\n\x17normalized_return_types\x18\r \x03(\t\x12\x12\n\x05\x61sync\x18\x0e \x01(\x08H\x02\x88\x01\x01"6\n\x08Maturity\x12\x10\n\x0c\x45XPERIMENTAL\x10\x00\x12\x08\n\x04\x42\x45TA\x10\x01\x12\x0e\n\nDEPRECATED\x10\x02"L\n\x04Type\x12\x0c\n\x08\x46UNCTION\x10\x00\x12\x13\n\x0fINSTANCE_METHOD\x10\x01\x12\x10\n\x0c\x43LASS_METHOD\x10\x02\x12\x0f\n\x0b\x43ONSTRUCTOR\x10\x03\x42\x0e\n\x0c_descriptionB\x0b\n\t_maturityB\x08\n\x06_async"`\n\x0f\x46\x65\x61tureRegistry\x12\x10\n\x08language\x18\x01 \x01(\t\x12\x0f\n\x07version\x18\x02 \x01(\t\x12*\n\x08\x66\x65\x61tures\x18\x03 \x03(\x0b\x32\x18.google.adk.meta.Feature*o\n\tParamType\x12\n\n\x06OBJECT\x10\x00\x12\n\n\x06STRING\x10\x01\x12\n\n\x06NUMBER\x10\x02\x12\x0b\n\x07\x42OOLEAN\x10\x03\x12\x08\n\x04LIST\x10\x04\x12\x07\n\x03MAP\x10\x05\x12\x07\n\x03SET\x10\x06\x12\x0b\n\x07UNKNOWN\x10\x07\x12\x08\n\x04NULL\x10\x08\x62\x06proto3' ) _globals = globals() @@ -28,7 +28,7 @@ if not _descriptor._USE_C_DESCRIPTORS: DESCRIPTOR._loaded_options = None _globals["_PARAMTYPE"]._serialized_start = 939 - _globals["_PARAMTYPE"]._serialized_end = 1040 + _globals["_PARAMTYPE"]._serialized_end = 1050 _globals["_PARAM"]._serialized_start = 36 _globals["_PARAM"]._serialized_end = 232 _globals["_FEATURE"]._serialized_start = 235 diff --git a/src/google/adk/scope/matcher/matcher.py b/src/google/adk/scope/matcher/matcher.py index bfc1b96..40ba4c1 100644 --- a/src/google/adk/scope/matcher/matcher.py +++ b/src/google/adk/scope/matcher/matcher.py @@ -1,30 +1,18 @@ -import argparse -import dataclasses -import logging -import sys from collections import defaultdict -from pathlib import Path from typing import Dict, List, Tuple import numpy as np -from google.protobuf import text_format +from jellyfish import jaro_winkler_similarity from scipy.optimize import linear_sum_assignment from google.adk.scope import features_pb2 -from google.adk.scope.utils import args as adk_args from google.adk.scope.utils import stats from google.adk.scope.utils.similarity import SimilarityScorer _NEAR_MISS_THRESHOLD = 0.15 -@dataclasses.dataclass -class MatchResult: - master_content: str - module_files: Dict[str, str] # filename -> content - - -def format_feature(f: features_pb2.Feature) -> str: +def _format_feature(f: features_pb2.Feature) -> str: name = f.original_name or f.normalized_name member = f.member_of if member and member.lower() != "null": @@ -44,7 +32,7 @@ def get_type_display_name(f: features_pb2.Feature) -> str: return "unknown" -def get_type_priority(f: features_pb2.Feature) -> int: +def _get_type_priority(f: features_pb2.Feature) -> int: """Returns priority: constructor < function < method < unknown.""" type_name = get_type_display_name(f) priorities = { @@ -56,14 +44,6 @@ def get_type_priority(f: features_pb2.Feature) -> int: return priorities.get(type_name, 99) -def read_feature_registry(file_path: str) -> features_pb2.FeatureRegistry: - """Reads a FeatureRegistry from a text proto file.""" - registry = features_pb2.FeatureRegistry() - with open(file_path, "rb") as f: - text_format.Parse(f.read(), registry) - return registry - - def match_features( base_features: List[features_pb2.Feature], target_features: List[features_pb2.Feature], @@ -112,38 +92,11 @@ def match_features( return matches -def get_language_code(language_name: str) -> str: - """Returns a short code for the language.""" - name = language_name.upper() - if name == "PYTHON": - return "py" - elif name == "TYPESCRIPT": - return "ts" - elif name == "JAVA": - return "java" - elif name == "GOLANG": - return "go" - else: - return name[:2].lower() - - -def _group_features_by_module( - registry: features_pb2.FeatureRegistry, -) -> Dict[str, List[features_pb2.Feature]]: - """Groups features by their module.""" - features = defaultdict(list) - for f in registry.features: - key = f.normalized_namespace or f.namespace or "Unknown Module" - features[key].append(f) - return features - - -def _fuzzy_match_namespaces( +def fuzzy_match_namespaces( features_base: Dict[str, List[features_pb2.Feature]], features_target: Dict[str, List[features_pb2.Feature]], ) -> None: """Remaps target namespaces to base namespaces using fuzzy matching.""" - from jellyfish import jaro_winkler_similarity base_namespaces = sorted(list(features_base.keys())) remapped_features = defaultdict(list, {k: [] for k in features_base}) @@ -174,249 +127,7 @@ def _fuzzy_match_namespaces( features_target.update(remapped_features) -def match_registries( - base_registry: features_pb2.FeatureRegistry, - target_registry: features_pb2.FeatureRegistry, - alpha: float, - report_type: str = "symmetric", -) -> MatchResult: - """Matches features and generates a master report + module sub-reports.""" - - features_base = _group_features_by_module(base_registry) - features_target = _group_features_by_module(target_registry) - _fuzzy_match_namespaces(features_base, features_target) - - if report_type == "directional": - all_modules = sorted(features_base.keys()) - else: - all_modules = sorted( - set(features_base.keys()) | set(features_target.keys()) - ) - - if report_type == "raw": - return _generate_raw_report( - base_registry, - target_registry, - all_modules, - features_base, - features_target, - alpha, - ) - - return _generate_markdown_report( - base_registry, - target_registry, - all_modules, - features_base, - features_target, - alpha, - report_type, - ) - - -def _generate_raw_report( - base_registry: features_pb2.FeatureRegistry, - target_registry: features_pb2.FeatureRegistry, - all_modules: List[str], - features_base: Dict[str, List[features_pb2.Feature]], - features_target: Dict[str, List[features_pb2.Feature]], - alpha: float, -) -> MatchResult: - """Generates a raw CSV report.""" - base_code = get_language_code(base_registry.language) - target_code = get_language_code(target_registry.language) - csv_header = ( - f"{base_code}_namespace,{base_code}_member_of,{base_code}_name," - f"{target_code}_namespace,{target_code}_member_of,{target_code}_name," - "type,score" - ) - csv_lines = [csv_header] - - def get_feature_cols(f: features_pb2.Feature) -> tuple[str, str, str]: - ns = f.namespace or "" - if not ns and f.normalized_namespace: - ns = f.normalized_namespace - - mem = f.member_of or "" - if not mem and f.normalized_member_of: - mem = f.normalized_member_of - if mem.lower() == "null": - mem = "" - - name = f.original_name or f.normalized_name or "" - return ns, mem, name - - def escape_csv(s): - if s is None: - return "" - if "," in s or '"' in s or "\n" in s: - return '"{}"'.format(s.replace('"', '""')) - return s - - for module in all_modules: - base_list = features_base.get(module, []) - target_list = features_target.get(module, []) - - solid_matches = match_features(base_list, target_list, alpha) - beta = max(0.0, alpha - _NEAR_MISS_THRESHOLD) - potential_matches = match_features(base_list, target_list, beta) - - unmatched_base = base_list - unmatched_target = target_list - - for f_base, f_target, score in solid_matches: - b_ns, b_mem, b_name = get_feature_cols(f_base) - t_ns, t_mem, t_name = get_feature_cols(f_target) - f_type = get_type_display_name(f_base) - csv_lines.append( - f"{escape_csv(b_ns)},{escape_csv(b_mem)},{escape_csv(b_name)}," - f"{escape_csv(t_ns)},{escape_csv(t_mem)},{escape_csv(t_name)}," - f"{escape_csv(f_type)},{score:.4f}" - ) - - for f_base, f_target, score in potential_matches: - b_ns, b_mem, b_name = get_feature_cols(f_base) - t_ns, t_mem, t_name = get_feature_cols(f_target) - f_type = get_type_display_name(f_base) - csv_lines.append( - f"{escape_csv(b_ns)},{escape_csv(b_mem)},{escape_csv(b_name)}," - f"{escape_csv(t_ns)},{escape_csv(t_mem)},{escape_csv(t_name)}," - f"{escape_csv(f_type)},{score:.4f}" - ) - - for f_base in unmatched_base: - b_ns, b_mem, b_name = get_feature_cols(f_base) - f_type = get_type_display_name(f_base) - csv_lines.append( - f"{escape_csv(b_ns)},{escape_csv(b_mem)},{escape_csv(b_name)}," - f",,,{escape_csv(f_type)},0.0000" - ) - - for f_target in unmatched_target: - t_ns, t_mem, t_name = get_feature_cols(f_target) - f_type = get_type_display_name(f_target) - csv_lines.append( - f",,,{escape_csv(t_ns)},{escape_csv(t_mem)}," - f"{escape_csv(t_name)},{escape_csv(f_type)},0.0000" - ) - - return MatchResult(master_content="\n".join(csv_lines), module_files={}) - - -def _generate_markdown_report( - base_registry: features_pb2.FeatureRegistry, - target_registry: features_pb2.FeatureRegistry, - all_modules: List[str], - features_base: Dict[str, List[features_pb2.Feature]], - features_target: Dict[str, List[features_pb2.Feature]], - alpha: float, - report_type: str, -) -> MatchResult: - """Generates a markdown report.""" - from datetime import datetime - - master_lines = [] - title_suffix = "Symmetric" if report_type == "symmetric" else "Directional" - master_lines.extend( - [ - f"# Feature Matching Report: {title_suffix}", - f"Date: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}", - "", - f"**Base:** {base_registry.language} ({base_registry.version})", - f"**Target:** {target_registry.language}" - f" ({target_registry.version})", - ] - ) - - global_score_idx = len(master_lines) - master_lines.append("GLOBAL_SCORE_PLACEHOLDER") - master_lines.append("") - - header = "| Module | Features (Base) | Score | Status | Details |" - divider = "|---|---|---|---|---|" - if report_type == "symmetric": - header = "| ADK | Module | Features (Base) | Score | Status | Details |" - divider = "|---|---|---|---|---|---|" - master_lines.extend(["## Module Summary", header, divider]) - - module_files = {} - module_rows = [] - total_solid_matches = 0 - - base_code = get_language_code(base_registry.language) - target_code = get_language_code(target_registry.language) - - for module in all_modules: - mod_base_list = features_base.get(module, []) - mod_target_list = features_target.get(module, []) - - results = _process_module( - module, - mod_base_list, - mod_target_list, - alpha, - report_type, - base_code, - target_code, - ) - total_solid_matches += results["solid_matches_count"] - module_rows.append((results["score"], results["row_content"])) - if results.get("module_filename"): - module_files[results["module_filename"]] = results["module_content"] - - module_rows.sort(key=lambda x: x[0], reverse=True) - master_lines.extend([row for _, row in module_rows]) - - total_base_features = len(base_registry.features) - total_target_features = len(target_registry.features) - if report_type == "symmetric": - union_size = ( - total_base_features + total_target_features - total_solid_matches - ) - parity_score = ( - total_solid_matches / union_size if union_size > 0 else 1.0 - ) - global_stats = ( - f"**Jaccard Index:** {parity_score:.2%}\n\n" - "> The Jaccard Index measures the similarity between the " - "two feature sets. A score of 100% indicates that both languages " - "have identical features." - ) - else: - precision = stats.calculate_precision( - total_solid_matches, total_target_features - ) - recall = stats.calculate_recall( - total_solid_matches, total_base_features - ) - parity_score = stats.calculate_f1(precision, recall) - - global_stats = ( - "\n| Metric | Score |\n" - "|---|---|\n" - f"| **Precision** | {precision:.2%} |\n" - f"| **Recall** | {recall:.2%} |\n" - f"| **F1 Score** | {parity_score:.2%} |\n\n" - "> **Precision**: Of all features in the target, how many are " - "correct matches to the base? (High score = low number of extra " - "features in target)\n\n" - "> **Recall**: Of all features in the base, how many were found in " - "the target? (High score = low number of missing features in " - "target)\n\n" - "> **F1 Score**: A weighted average of Precision and Recall, " - "providing a single measure of how well the target feature set " - "matches the base." - ) - - master_lines[global_score_idx] = global_stats - - return MatchResult( - master_content="\n".join(master_lines).strip(), - module_files=module_files, - ) - - -def _process_module( +def process_module( module: str, base_list: List[features_pb2.Feature], target_list: List[features_pb2.Feature], @@ -495,10 +206,10 @@ def _process_module( mod_lines.extend(["", f"**Features:** {mod_total_features}", ""]) solid_matches.sort( - key=lambda x: (get_type_priority(x[0]), x[0].normalized_name) + key=lambda x: (_get_type_priority(x[0]), x[0].normalized_name) ) potential_matches.sort( - key=lambda x: (get_type_priority(x[0]), x[0].normalized_name) + key=lambda x: (_get_type_priority(x[0]), x[0].normalized_name) ) if solid_matches: @@ -515,8 +226,8 @@ def _process_module( mod_lines.extend( [ f"| {get_type_display_name(f_base)} |" - f" `{format_feature(f_base)}`" - f" | `{format_feature(f_target)}` | {score:.2f} |" + f" `{_format_feature(f_base)}`" + f" | `{_format_feature(f_target)}` | {score:.2f} |" for f_base, f_target, score in solid_matches ] ) @@ -534,8 +245,8 @@ def _process_module( mod_lines.extend( [ f"| {get_type_display_name(f_base)} |" - f" `{format_feature(f_base)}`" - f" | `{format_feature(f_target)}` | {score:.2f} |" + f" `{_format_feature(f_base)}`" + f" | `{_format_feature(f_target)}` | {score:.2f} |" for f_base, f_target, score in potential_matches ] ) @@ -550,17 +261,19 @@ def _process_module( ] ) mod_lines.extend( - [f"| `{format_feature(f)}` | Target |" for f in unmatched_base] + [f"| `{_format_feature(f)}` | Target |" for f in unmatched_base] ) mod_lines.extend( - [f"| `{format_feature(f)}` | Base |" for f in unmatched_target] + [f"| `{_format_feature(f)}` | Base |" for f in unmatched_target] ) mod_lines.append("") elif report_type == "directional" and unmatched_base: mod_lines.extend( ["### ❌ Missing in Target", "| Missing Feature |", "|---|"] ) - mod_lines.extend([f"| `{format_feature(f)}` |" for f in unmatched_base]) + mod_lines.extend( + [f"| `{_format_feature(f)}` |" for f in unmatched_base] + ) mod_lines.append("") return { @@ -570,101 +283,3 @@ def _process_module( "module_filename": module_filename, "module_content": "\n".join(mod_lines).strip(), } - - -def main(): - parser = argparse.ArgumentParser( - description="Match ADK features between two languages." - ) - parser.add_argument( - "--base", - required=True, - help="Path to the base FeatureRegistry .txtpb file.", - ) - parser.add_argument( - "--target", - required=True, - help="Path to the target FeatureRegistry .txtpb file.", - ) - parser.add_argument( - "--output", - required=True, - help="Path to save the Markdown report.", - ) - parser.add_argument( - "--alpha", - type=float, - default=0.8, - help="Similarity threshold (0.0 to 1.0) defaults to 0.8.", - ) - parser.add_argument( - "--report-type", - choices=["symmetric", "directional", "raw"], - default="symmetric", - help="Type of gap report to generate (symmetric, directional, or raw).", - ) - adk_args.add_verbose_argument(parser) - args = parser.parse_args() - adk_args.configure_logging(args) - - try: - base_registry = read_feature_registry(args.base) - target_registry = read_feature_registry(args.target) - except Exception as e: - logging.error(f"Error reading feature registries: {e}") - sys.exit(1) - - result = match_registries( - base_registry, target_registry, args.alpha, args.report_type - ) - - output_path = Path(args.output) - output_path.parent.mkdir(parents=True, exist_ok=True) - - if args.report_type == "raw": - # Raw report is a single file, no modules directory needed - try: - output_path.parent.mkdir(parents=True, exist_ok=True) - output_path.write_text(result.master_content) - logging.info( - f"Successfully wrote raw match report to {output_path}" - ) - except Exception as e: - logging.error(f"Error writing raw report to {output_path}: {e}") - sys.exit(1) - return - - # Create module directory - if result.module_files: - modules_dir_name = f"{output_path.stem}_modules" - modules_dir = output_path.parent / modules_dir_name - modules_dir.mkdir(parents=True, exist_ok=True) - - # Write module files - for filename, content in result.module_files.items(): - # Replace placeholder for master report link - # The link is relative from module dir to master report - # So name is enough. - final_content = content.replace("{master_report}", output_path.name) - (modules_dir / filename).write_text(final_content) - - # Replace placeholder in Master Report - # We assume master report is in parent of modules_dir - # modules_dir relative to master report is just the dir name - master_report = result.master_content.replace( - "{modules_dir}", modules_dir_name - ) - else: - master_report = result.master_content.replace("{modules_dir}", ".") - - try: - output_path.parent.mkdir(parents=True, exist_ok=True) - output_path.write_text(master_report) - logging.info(f"Successfully wrote match report to {output_path}") - except Exception as e: - logging.error(f"Error writing report to {output_path}: {e}") - sys.exit(1) - - -if __name__ == "__main__": - main() diff --git a/src/google/adk/scope/reporter/__init__.py b/src/google/adk/scope/reporter/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/src/google/adk/scope/reporter/reporter.py b/src/google/adk/scope/reporter/reporter.py new file mode 100644 index 0000000..4044a8b --- /dev/null +++ b/src/google/adk/scope/reporter/reporter.py @@ -0,0 +1,471 @@ +import argparse +import dataclasses +import logging +import sys +from collections import defaultdict +from datetime import datetime +from pathlib import Path +from typing import Dict, List + +from google.protobuf import text_format + +from google.adk.scope import features_pb2 +from google.adk.scope.matcher import matcher +from google.adk.scope.utils import args as adk_args +from google.adk.scope.utils import stats + +_NEAR_MISS_THRESHOLD = 0.15 + + +@dataclasses.dataclass +class MatchResult: + master_content: str + module_files: Dict[str, str] # filename -> content + + +def _group_features_by_module( + registry: features_pb2.FeatureRegistry, +) -> Dict[str, List[features_pb2.Feature]]: + """Groups features by their module.""" + features = defaultdict(list) + for f in registry.features: + key = f.normalized_namespace or f.namespace or "Unknown Module" + features[key].append(f) + return features + + +def _get_language_code(language_name: str) -> str: + """Returns a short code for the language.""" + name = language_name.upper() + if name == {"PYTHON", "PY"}: + return "py" + elif name in {"TYPESCRIPT", "TS"}: + return "ts" + elif name == "JAVA": + return "java" + elif name in {"GOLANG", "GO"}: + return "go" + else: + return name.lower() + + +def _read_feature_registry(file_path: str) -> features_pb2.FeatureRegistry: + """Reads a FeatureRegistry from a text proto file.""" + registry = features_pb2.FeatureRegistry() + with open(file_path, "rb") as f: + text_format.Parse(f.read(), registry) + return registry + + +def match_registries( + base_registry: features_pb2.FeatureRegistry, + target_registry: features_pb2.FeatureRegistry, + alpha: float, + report_type: str = "symmetric", +) -> MatchResult: + """Matches features and generates a master report + module sub-reports.""" + reporter = ReportGenerator( + base_registry, + target_registry, + alpha, + ) + + return reporter.generate_report(report_type) + + +class ReportGenerator: + def __init__( + self, + base_registry: features_pb2.FeatureRegistry, + target_registry: features_pb2.FeatureRegistry, + alpha: float, + ): + self.base_registry = base_registry + self.target_registry = target_registry + + self.features_base = _group_features_by_module(base_registry) + self.features_target = _group_features_by_module(target_registry) + matcher.fuzzy_match_namespaces(self.features_base, self.features_target) + self.alpha = alpha + + def generate_report(self, report_type) -> MatchResult: + """Generates report.""" + if report_type == "raw": + return self.generate_raw_report() + elif report_type == "directional": + return self.generate_directional_report() + elif report_type == "symmetric": + return self.generate_symmetric_report() + else: + raise ValueError(f"Unknown report type: {report_type}") + + def generate_raw_report(self) -> MatchResult: + """Generates a raw CSV report.""" + base_code = _get_language_code(self.base_registry.language) + target_code = _get_language_code(self.target_registry.language) + all_modules = sorted( + set(self.features_base.keys()) | set(self.features_target.keys()) + ) + csv_header = ( + f"{base_code}_namespace,{base_code}_member_of,{base_code}_name," + f"{target_code}_namespace,{target_code}_member_of,{target_code}_name," + "type,score" + ) + csv_lines = [csv_header] + + def get_feature_cols(f: features_pb2.Feature) -> tuple[str, str, str]: + ns = f.namespace or "" + if not ns and f.normalized_namespace: + ns = f.normalized_namespace + + mem = f.member_of or "" + if not mem and f.normalized_member_of: + mem = f.normalized_member_of + if mem.lower() == "null": + mem = "" + + name = f.original_name or f.normalized_name or "" + return ns, mem, name + + def esc_csv(s): + if s is None: + return "" + if "," in s or '"' in s or "\n" in s: + return '"{}"'.format(s.replace('"', '""')) + return s + + for module in all_modules: + base_list = self.features_base.get(module, []) + target_list = self.features_target.get(module, []) + + solid_matches = matcher.match_features( + base_list, target_list, self.alpha + ) + beta = max(0.0, self.alpha - _NEAR_MISS_THRESHOLD) + potential_matches = matcher.match_features( + base_list, target_list, beta + ) + + unmatched_base = list(base_list) + unmatched_target = list(target_list) + + for f_base, f_target, score in solid_matches: + b_ns, b_mem, b_name = get_feature_cols(f_base) + t_ns, t_mem, t_name = get_feature_cols(f_target) + f_type = matcher.get_type_display_name(f_base) + csv_lines.append( + f"{esc_csv(b_ns)},{esc_csv(b_mem)},{esc_csv(b_name)}," + f"{esc_csv(t_ns)},{esc_csv(t_mem)},{esc_csv(t_name)}," + f"{esc_csv(f_type)},{score:.4f}" + ) + + for f_base, f_target, score in potential_matches: + b_ns, b_mem, b_name = get_feature_cols(f_base) + t_ns, t_mem, t_name = get_feature_cols(f_target) + f_type = matcher.get_type_display_name(f_base) + csv_lines.append( + f"{esc_csv(b_ns)},{esc_csv(b_mem)},{esc_csv(b_name)}," + f"{esc_csv(t_ns)},{esc_csv(t_mem)},{esc_csv(t_name)}," + f"{esc_csv(f_type)},{score:.4f}" + ) + + for f_base in unmatched_base: + b_ns, b_mem, b_name = get_feature_cols(f_base) + f_type = matcher.get_type_display_name(f_base) + csv_lines.append( + f"{esc_csv(b_ns)},{esc_csv(b_mem)},{esc_csv(b_name)}," + f",,,{esc_csv(f_type)},0.0000" + ) + + for f_target in unmatched_target: + t_ns, t_mem, t_name = get_feature_cols(f_target) + f_type = matcher.get_type_display_name(f_target) + csv_lines.append( + f",,,{esc_csv(t_ns)},{esc_csv(t_mem)}," + f"{esc_csv(t_name)},{esc_csv(f_type)},0.0000" + ) + + return MatchResult( + master_content="\n".join(csv_lines), + module_files={}, + ) + + def generate_directional_report(self) -> MatchResult: + """Generates a directional report.""" + all_modules = sorted(self.features_base.keys()) + master_lines = [] + title_suffix = "Directional" + master_lines.extend( + [ + f"# Feature Matching Report: {title_suffix}", + f"Date: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}", + "", + ( + f"**Base:** {self.base_registry.language}" + f" ({self.base_registry.version})" + ), + ( + f"**Target:** {self.target_registry.language}" + f" ({self.target_registry.version})" + ), + ] + ) + + global_score_idx = len(master_lines) + master_lines.append("GLOBAL_SCORE_PLACEHOLDER") + master_lines.append("") + + header = "| Module | Features (Base) | Score | Status | Details |" + divider = "|---|---|---|---|---|" + master_lines.extend(["## Module Summary", header, divider]) + + module_files = {} + module_rows = [] + total_solid_matches = 0 + + base_code = _get_language_code(self.base_registry.language) + target_code = _get_language_code(self.target_registry.language) + + for module in all_modules: + mod_base_list = self.features_base.get(module, []) + mod_target_list = self.features_target.get(module, []) + + results = matcher.process_module( + module, + mod_base_list, + mod_target_list, + self.alpha, + "directional", + base_code, + target_code, + ) + total_solid_matches += results["solid_matches_count"] + module_rows.append((results["score"], results["row_content"])) + if results.get("module_filename"): + module_files[results["module_filename"]] = results[ + "module_content" + ] + + module_rows.sort(key=lambda x: x[0], reverse=True) + master_lines.extend([row for _, row in module_rows]) + + total_base_features = len(self.base_registry.features) + total_target_features = len(self.target_registry.features) + + precision = stats.calculate_precision( + total_solid_matches, total_target_features + ) + recall = stats.calculate_recall( + total_solid_matches, total_base_features + ) + parity_score = stats.calculate_f1(precision, recall) + + global_stats = ( + "\n| Metric | Score |\n" + "|---|---|\n" + f"| **Precision** | {precision:.2%} |\n" + f"| **Recall** | {recall:.2%} |\n" + f"| **F1 Score** | {parity_score:.2%} |\n\n" + "> **Precision**: Of all features in the target, how many are " + "correct matches to the base? (High score = low number of extra " + "features in target)\n\n" + "> **Recall**: Of all features in the base, how many were found in " + "the target? (High score = low number of missing features in " + "target)\n\n" + "> **F1 Score**: A weighted average of Precision and Recall, " + "providing a single measure of how well the target feature set " + "matches the base." + ) + + master_lines[global_score_idx] = global_stats + + return MatchResult( + master_content="\n".join(master_lines).strip(), + module_files=module_files, + ) + + def generate_symmetric_report(self) -> MatchResult: + """Generates a symmetric report.""" + all_modules = sorted( + set(self.features_base.keys()) | set(self.features_target.keys()) + ) + master_lines = [] + title_suffix = "Symmetric" + master_lines.extend( + [ + f"# Feature Matching Report: {title_suffix}", + f"Date: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}", + "", + "| Registry | Language | Version |", + "| :--- | :--- | :--- |", + ( + f"| **Base** | {self.base_registry.language} |" + f" {self.base_registry.version} |" + ), + ( + f"| **Target** | {self.target_registry.language} |" + f" {self.target_registry.version} |" + ), + "", + ] + ) + + global_score_idx = len(master_lines) + master_lines.append("GLOBAL_SCORE_PLACEHOLDER") + master_lines.append("") + + header = "| ADK | Module | Features (Base) | Score | Status | Details |" + divider = "|---|---|---|---|---|---|" + + master_lines.extend(["## Module Summary", header, divider]) + + module_files = {} + module_rows = [] + total_solid_matches = 0 + + base_code = _get_language_code(self.base_registry.language) + target_code = _get_language_code(self.target_registry.language) + + for module in all_modules: + mod_base_list = self.features_base.get(module, []) + mod_target_list = self.features_target.get(module, []) + + results = matcher.process_module( + module, + mod_base_list, + mod_target_list, + self.alpha, + "symmetric", + base_code, + target_code, + ) + total_solid_matches += results["solid_matches_count"] + module_rows.append((results["score"], results["row_content"])) + if results.get("module_filename"): + module_files[results["module_filename"]] = results[ + "module_content" + ] + + module_rows.sort(key=lambda x: x[0], reverse=True) + master_lines.extend([row for _, row in module_rows]) + + total_base_features = len(self.base_registry.features) + total_target_features = len(self.target_registry.features) + + union_size = ( + total_base_features + total_target_features - total_solid_matches + ) + parity_score = ( + total_solid_matches / union_size if union_size > 0 else 1.0 + ) + global_stats = ( + f"**Jaccard Index:** {parity_score:.2%}\n\n" + "> The Jaccard Index measures the similarity between the " + "two feature sets. A score of 100% indicates that both languages " + "have identical features." + ) + + master_lines[global_score_idx] = global_stats + + return MatchResult( + master_content="\n".join(master_lines).strip(), + module_files=module_files, + ) + + +def main(): + parser = argparse.ArgumentParser( + description="Match ADK features between two languages." + ) + parser.add_argument( + "--base", + required=True, + help="Path to the base FeatureRegistry .txtpb file.", + ) + parser.add_argument( + "--target", + required=True, + help="Path to the target FeatureRegistry .txtpb file.", + ) + parser.add_argument( + "--output", + required=True, + help="Path to save the Markdown report.", + ) + parser.add_argument( + "--alpha", + type=float, + default=0.8, + help="Similarity threshold (0.0 to 1.0) defaults to 0.8.", + ) + parser.add_argument( + "--report-type", + choices=["symmetric", "directional", "raw"], + default="symmetric", + help="Type of gap report to generate (symmetric, directional, or raw).", + ) + adk_args.add_verbose_argument(parser) + args = parser.parse_args() + adk_args.configure_logging(args) + + try: + base_registry = _read_feature_registry(args.base) + target_registry = _read_feature_registry(args.target) + except Exception as e: + logging.error(f"Error reading feature registries: {e}") + sys.exit(1) + + result = match_registries( + base_registry, target_registry, args.alpha, args.report_type + ) + + output_path = Path(args.output) + output_path.parent.mkdir(parents=True, exist_ok=True) + + if args.report_type == "raw": + # Raw report is a single file, no modules directory needed + try: + output_path.parent.mkdir(parents=True, exist_ok=True) + output_path.write_text(result.master_content) + logging.info( + f"Successfully wrote raw match report to {output_path}" + ) + except Exception as e: + logging.error(f"Error writing raw report to {output_path}: {e}") + sys.exit(1) + return + + # Create module directory + if result.module_files: + modules_dir_name = f"{output_path.stem}_modules" + modules_dir = output_path.parent / modules_dir_name + modules_dir.mkdir(parents=True, exist_ok=True) + + # Write module files + for filename, content in result.module_files.items(): + # Replace placeholder for master report link + # The link is relative from module dir to master report + # So name is enough. + final_content = content.replace("{master_report}", output_path.name) + (modules_dir / filename).write_text(final_content) + + # Replace placeholder in Master Report + # We assume master report is in parent of modules_dir + # modules_dir relative to master report is just the dir name + master_report = result.master_content.replace( + "{modules_dir}", modules_dir_name + ) + else: + master_report = result.master_content.replace("{modules_dir}", ".") + + try: + output_path.parent.mkdir(parents=True, exist_ok=True) + output_path.write_text(master_report) + logging.info(f"Successfully wrote match report to {output_path}") + except Exception as e: + logging.error(f"Error writing report to {output_path}: {e}") + sys.exit(1) + + +if __name__ == "__main__": + main() diff --git a/src/google/adk/scope/utils/normalizer.py b/src/google/adk/scope/utils/normalizer.py index ab974a1..5adf96d 100644 --- a/src/google/adk/scope/utils/normalizer.py +++ b/src/google/adk/scope/utils/normalizer.py @@ -47,8 +47,8 @@ def _normalize_py_type(self, type_name: str) -> list[str]: if type_name.startswith("Optional[") and type_name.endswith("]"): inner = type_name[9:-1] result = self._normalize_py_type(inner) - if "null" not in result: - result.append("null") + if "NULL" not in result: + result.append("NULL") return result # Handle tuple[A, B] -> [A, B] @@ -60,6 +60,14 @@ def _normalize_py_type(self, type_name: str) -> list[str]: result.extend(self._normalize_py_type(p)) return self._unique(result) + # Handle str | list[str] + if "|" in type_name: + parts = type_name.split("|") + result = [] + for p in parts: + result.extend(self._normalize_py_type(p.strip())) + return self._unique(result) + # Handle other generics like List[int] -> LIST if "[" in type_name and type_name.endswith("]"): base = type_name.split("[", 1)[0] @@ -73,19 +81,23 @@ def _normalize_ts_type(self, t: str) -> List[str]: if not t: return ["OBJECT"] + if t in ("null", "undefined", "void"): + return ["NULL"] + # A | B if "|" in t: - parts = t.split("|") - res = [] - for p in parts: - res.extend(self._normalize_ts_type(p)) - return res + # Split by '|' only at the top level, respecting generics + parts = self._split_unions(t) + if len(parts) > 1: + res = [] + for p in parts: + res.extend(self._normalize_ts_type(p.strip())) + return self._unique(res) # Generics: Promise, Array - if "<" in t and t.endswith(">"): - base = t.split("<", 1)[0].strip() - # Find matching closing bracket or assumue last - inner = t[t.find("<") + 1 : -1].strip() + match = re.match(r"([a-zA-Z0-9_]+)<(.+)>$", t) + if match: + base, inner = match.groups() if base == "Promise": return self._normalize_ts_type(inner) @@ -108,6 +120,7 @@ def _normalize_ts_type(self, t: str) -> List[str]: return ["OBJECT"] t_lower = t.lower() + if t_lower in ("string", "formattedstring", "path"): return ["STRING"] if t_lower in ("number", "int", "float", "integer", "double"): @@ -128,15 +141,13 @@ def _normalize_ts_type(self, t: str) -> List[str]: return ["MAP"] if t_lower.startswith("set"): return ["SET"] - if t_lower == "void": - return [] return ["OBJECT"] def _simple_normalize(self, t: str) -> str: t = t.lower().strip() if t == "none": - return "null" + return "NULL" if t in ( "list", "array", @@ -195,3 +206,24 @@ def _unique(self, lst: list[str]) -> list[str]: seen.add(x) out.append(x) return out + + def _split_unions(self, s: str) -> list[str]: + """Split string by |, ignoring nested generics.""" + parts = [] + balance = 0 + current = [] + for char in s: + if char == "<": + balance += 1 + current.append(char) + elif char == ">": + balance -= 1 + current.append(char) + elif char == "|" and balance == 0: + parts.append("".join(current).strip()) + current = [] + else: + current.append(char) + if current: + parts.append("".join(current).strip()) + return parts diff --git a/test/adk/scope/extractors/test_converter_py.py b/test/adk/scope/extractors/test_converter_py.py index 3fb096d..43a4916 100644 --- a/test/adk/scope/extractors/test_converter_py.py +++ b/test/adk/scope/extractors/test_converter_py.py @@ -461,10 +461,10 @@ def node_child(name): [feature_pb2.ParamType.OBJECT], ) - # b: Optional[int] -> [int, null] -> [INT, null] -> INT (null skipped) - self.assertEqual( + # b: Optional[int] -> [int, null] -> [INT, NULL] + self.assertCountEqual( result.parameters[1].normalized_types, - [feature_pb2.ParamType.NUMBER], + [feature_pb2.ParamType.NUMBER, feature_pb2.ParamType.NULL], ) def test_param_empty_name(self): diff --git a/test/adk/scope/extractors/test_extractor_py.py b/test/adk/scope/extractors/test_extractor_py.py index bd28bf5..d16ec3c 100644 --- a/test/adk/scope/extractors/test_extractor_py.py +++ b/test/adk/scope/extractors/test_extractor_py.py @@ -3,8 +3,10 @@ from pathlib import Path from unittest.mock import MagicMock, patch -from google.adk.scope.extractors.extractor_py import (extract_features, - find_files) +from google.adk.scope.extractors.extractor_py import ( + extract_features, + find_files, +) from google.adk.scope.features_pb2 import Feature # Mock tree_sitter modules BEFORE importing extractor diff --git a/test/adk/scope/extractors/test_extractor_ts.py b/test/adk/scope/extractors/test_extractor_ts.py index f68c49a..c52bd19 100644 --- a/test/adk/scope/extractors/test_extractor_ts.py +++ b/test/adk/scope/extractors/test_extractor_ts.py @@ -5,6 +5,7 @@ from unittest.mock import Mock, patch from google.adk.scope import features_pb2 + # Import the module under test # We need to make sure the src path is in PYTHONPATH which is handled # by test runner usually diff --git a/test/adk/scope/matcher/test_matcher.py b/test/adk/scope/matcher/test_matcher.py index d0aae2f..bfa5cb6 100644 --- a/test/adk/scope/matcher/test_matcher.py +++ b/test/adk/scope/matcher/test_matcher.py @@ -1,39 +1,10 @@ -import os -import tempfile import unittest -from unittest.mock import patch + from google.adk.scope import features_pb2 from google.adk.scope.matcher import matcher class TestMatcher(unittest.TestCase): - def test_read_feature_registry(self): - content = """ - language: "PYTHON" - version: "1.0.0" - features { - original_name: "test_feature" - normalized_name: "test_feature" - type: FUNCTION - } - """ - with tempfile.NamedTemporaryFile( - mode="w", suffix=".txtpb", delete=False - ) as f: - f.write(content) - temp_path = f.name - - try: - registry = matcher.read_feature_registry(temp_path) - self.assertEqual(registry.language, "PYTHON") - self.assertEqual(registry.version, "1.0.0") - self.assertEqual(len(registry.features), 1) - self.assertEqual(registry.features[0].original_name, "test_feature") - self.assertEqual( - registry.features[0].type, features_pb2.Feature.Type.FUNCTION - ) - finally: - os.remove(temp_path) def test_match_features(self): f1 = features_pb2.Feature( @@ -80,211 +51,6 @@ def test_match_features(self): self.assertEqual(len(target_features), 1) self.assertEqual(target_features[0].normalized_name, "entirely") - def test_match_registries(self): - # f1 & f2 are a solid match (score ~ 1.0) - f1 = features_pb2.Feature( - original_name="fSameBase", - normalized_name="f_same", - member_of="BaseClass", - namespace="google.adk.events", - normalized_member_of="c_same", - normalized_namespace="n_same", - type=features_pb2.Feature.Type.INSTANCE_METHOD, - ) - f2 = features_pb2.Feature( - original_name="fSameTarget", - normalized_name="f_same", - member_of="TargetClass", - namespace="adk.events", - normalized_member_of="c_same", - normalized_namespace="n_same", - type=features_pb2.Feature.Type.INSTANCE_METHOD, - ) - - # f_near_base & f_near_target are a near miss - # (different names, same structural namespace/class) - # Using different return types and different enough names to - # drop the score below 0.8 - f_near_base = features_pb2.Feature( - original_name="base_name", - normalized_name="base_name", - member_of="base_member", - namespace="google.adk.events", - normalized_member_of="base_member", - normalized_namespace="n_same", - original_return_types=["string"], - type=features_pb2.Feature.Type.INSTANCE_METHOD, - ) - f_near_target = features_pb2.Feature( - original_name="target_name", - normalized_name="targ_name", - member_of="target_member", - namespace="adk.events", - normalized_member_of="target_member", - normalized_namespace="n_same", - original_return_types=["int"], - type=features_pb2.Feature.Type.INSTANCE_METHOD, - ) - - # f3 is a complete gap (base-exclusive) - f3 = features_pb2.Feature( - original_name="totally_diff", - normalized_name="totally", - member_of="null", - namespace="google.adk.events", - normalized_member_of="different", - normalized_namespace="stuff", - type=features_pb2.Feature.Type.INSTANCE_METHOD, - ) - - base_registry = features_pb2.FeatureRegistry( - language="Python", version="1.0.0" - ) - base_registry.features.extend([f1, f_near_base, f3]) - - target_registry = features_pb2.FeatureRegistry( - language="TypeScript", version="2.0.0" - ) - target_registry.features.extend([f2, f_near_target]) - - # Test Symmetric Report - result_sym = matcher.match_registries( - base_registry, target_registry, 0.9, report_type="symmetric" - ) - report_sym = result_sym.master_content - - # 1. Verify Master Report Structure - self.assertIn("# Feature Matching Report: Symmetric", report_sym) - self.assertIn("**Jaccard Index:** 25.00%", report_sym) - self.assertIn("## Module Summary", report_sym) - - # Check for module entry in master summary - self.assertIn("| `n_same` |", report_sym) - self.assertIn("[View Details]({modules_dir}/n_same.md)", report_sym) - - # 2. Verify Module Content - self.assertIn("n_same.md", result_sym.module_files) - module_content = result_sym.module_files["n_same.md"] - - self.assertIn("# Module: `n_same`", module_content) - self.assertIn("**Features:** 3", module_content) - - # Solid Matches - self.assertIn("### ✅ Solid Features", module_content) - self.assertIn( - "| Type | Base Feature | Target Feature | Similarity Score |", - module_content, - ) - self.assertIn( - "| method | `BaseClass.fSameBase` | `TargetClass.fSameTarget` |", - module_content, - ) - - # Potential Matches (formerly Near Misses) - self.assertIn("### ⚠️ Potential Matches", module_content) - self.assertIn( - "| Type | Base Feature | Closest Target Candidate | Similarity |", - module_content, - ) - self.assertIn( - "| method | `base_member.base_name` | " - "`target_member.target_name` |", - module_content, - ) - - # Unmatched / Gaps (in 'stuff' module) - self.assertIn("stuff.md", result_sym.module_files) - stuff_content = result_sym.module_files["stuff.md"] - self.assertIn("### ❌ Unmatched Features", stuff_content) - self.assertIn("| `totally_diff` | Target |", stuff_content) - self.assertIn("**Features:** 1", stuff_content) - - # Test Directional Report - result_dir = matcher.match_registries( - base_registry, target_registry, 0.9, report_type="directional" - ) - report_dir = result_dir.master_content - - self.assertIn("| **F1 Score** | 40.00% |", report_dir) - self.assertIn("n_same.md", result_dir.module_files) - - mod_dir_content = result_dir.module_files["n_same.md"] - - # Solid Matches - self.assertIn("### ✅ Matched Features", mod_dir_content) - self.assertIn( - "| Type | Base Feature | Target Feature | Similarity Score |", - mod_dir_content, - ) - self.assertIn( - "| method | `BaseClass.fSameBase` | `TargetClass.fSameTarget` |", - mod_dir_content, - ) - - # Potential Matches - self.assertIn("### ⚠️ Potential Matches", mod_dir_content) - self.assertIn( - "| Type | Base Feature | Closest Target Candidate | Similarity |", - mod_dir_content, - ) - self.assertIn( - "| method | `base_member.base_name` | " - "`target_member.target_name` |", - mod_dir_content, - ) - - # Unmatched / Gaps (in 'stuff' module) - self.assertIn("stuff.md", result_dir.module_files) - stuff_dir_content = result_dir.module_files["stuff.md"] - self.assertIn("### ❌ Missing in Target", stuff_dir_content) - self.assertIn("| `totally_diff` |", stuff_dir_content) - - def test_match_registries_raw(self): - f1 = features_pb2.Feature( - original_name="f_same", - normalized_name="f_same", - normalized_namespace="pkg", - member_of="MyClass", - normalized_member_of="myclass", - type=features_pb2.Feature.Type.FUNCTION, - ) - base = features_pb2.FeatureRegistry(language="Python", version="1") - base.features.append(f1) - target = features_pb2.FeatureRegistry(language="TS", version="2") - target.features.append(f1) - - result = matcher.match_registries(base, target, 0.9, report_type="raw") - csv_content = result.master_content - - expected_header = ( - "py_namespace,py_member_of,py_name,ts_namespace," - "ts_member_of,ts_name,type,score" - ) - self.assertIn(expected_header, csv_content) - - # Check for solid match line - # f1 has: ns=pkg, mem=MyClass, name=f_same - # Match should have same values for base and target - expected_line = "pkg,MyClass,f_same,pkg,MyClass,f_same,function,1.0000" - self.assertIn(expected_line, csv_content) - self.assertFalse(result.module_files) - - def test_group_features_by_module(self): - registry = features_pb2.FeatureRegistry() - f1 = registry.features.add() - f1.namespace = "module.one" - f2 = registry.features.add() - f2.namespace = "module.two" - f3 = registry.features.add() - f3.namespace = "module.one" - - result = matcher._group_features_by_module(registry) - - self.assertIn("module.one", result) - self.assertIn("module.two", result) - self.assertEqual(len(result["module.one"]), 2) - self.assertEqual(len(result["module.two"]), 1) - def test_fuzzy_match_namespaces(self): features_base = {"module.one": [], "module.two": []} features_target = { @@ -293,7 +59,7 @@ def test_fuzzy_match_namespaces(self): "module.three": [features_pb2.Feature(original_name="f5")], } - matcher._fuzzy_match_namespaces(features_base, features_target) + matcher.fuzzy_match_namespaces(features_base, features_target) self.assertIn("module.one", features_target) self.assertIn("module.two", features_target) @@ -302,126 +68,13 @@ def test_fuzzy_match_namespaces(self): self.assertEqual(len(features_target["module.one"]), 3) self.assertEqual(len(features_target["module.two"]), 0) - def test_process_module(self): - """Tests the end-to-end processing of a single module.""" - f_base = features_pb2.Feature( - original_name="f1_base", - normalized_name="f1_base", - normalized_namespace="n1", - type=features_pb2.Feature.Type.FUNCTION, - ) - f_target = features_pb2.Feature( - original_name="f1_target", - normalized_name="f1_target", - normalized_namespace="n1", - type=features_pb2.Feature.Type.FUNCTION, - ) - - with patch( - "google.adk.scope.matcher.matcher.match_features" - ) as mock_match: - # Let's assume one solid match and no potential matches - mock_match.side_effect = [ - [(f_base, f_target, 0.95)], # Solid matches - [], # Potential matches - ] - - result = matcher._process_module( - module="n1", - base_list=[f_base], - target_list=[f_target], - alpha=0.9, - report_type="symmetric", - base_lang_code="py", - target_lang_code="ts", - ) - - self.assertEqual(result["solid_matches_count"], 1) - self.assertEqual(result["score"], 1.0) - self.assertIn("| py, ts |", result["row_content"]) - self.assertIn("# Module: `n1`", result["module_content"]) - self.assertIn("### ✅ Solid Features", result["module_content"]) - - def test_generate_raw_report(self): - """Tests the raw CSV report generation.""" - f_base = features_pb2.Feature( - original_name="f1_base", - normalized_name="f1_base", - namespace="n1", - member_of="c1", - type=features_pb2.Feature.Type.FUNCTION, - ) - - base_registry = features_pb2.FeatureRegistry( - language="Python", version="1.0.0" - ) - target_registry = features_pb2.FeatureRegistry( - language="TypeScript", version="2.0.0" - ) - - with patch( - "google.adk.scope.matcher.matcher.match_features" - ) as mock_match: - mock_match.return_value = [] # No matches for simplicity - - result = matcher._generate_raw_report( - base_registry=base_registry, - target_registry=target_registry, - all_modules=["n1"], - features_base={"n1": [f_base]}, - features_target={"n1": []}, - alpha=0.9, - ) - - self.assertIn( - "py_namespace,py_member_of,py_name", result.master_content - ) - self.assertIn("n1,c1,f1_base", result.master_content) - - def test_generate_markdown_report(self): - """Tests the markdown report generation.""" - base_registry = features_pb2.FeatureRegistry( - language="Python", version="1.0.0" - ) - target_registry = features_pb2.FeatureRegistry( - language="TypeScript", version="2.0.0" - ) - - with patch( - "google.adk.scope.matcher.matcher._process_module" - ) as mock_process: - mock_process.return_value = { - "solid_matches_count": 1, - "score": 1.0, - "row_content": "| py, ts | `n1` | 1 | 100.00% | ✅ | n1.md |", - "module_filename": "n1.md", - "module_content": "# Module: `n1`", - } - - result = matcher._generate_markdown_report( - base_registry=base_registry, - target_registry=target_registry, - all_modules=["n1"], - features_base={"n1": []}, # Dummy data - features_target={"n1": []}, # Dummy data - alpha=0.9, - report_type="symmetric", - ) - - self.assertIn( - "# Feature Matching Report: Symmetric", result.master_content - ) - self.assertIn("## Module Summary", result.master_content) - self.assertIn("| `n1` |", result.master_content) - self.assertIn("n1.md", result.module_files) - def test_fuzzy_match_namespaces_empty_base(self): features_base = {} features_target = { "module.one": [features_pb2.Feature(original_name="f1")] } - matcher._fuzzy_match_namespaces(features_base, features_target) + matcher.fuzzy_match_namespaces(features_base, features_target) self.assertIn("module.one", features_target) self.assertEqual(len(features_target["module.one"]), 1) diff --git a/test/adk/scope/reporter/test_reporter.py b/test/adk/scope/reporter/test_reporter.py new file mode 100644 index 0000000..f3f0223 --- /dev/null +++ b/test/adk/scope/reporter/test_reporter.py @@ -0,0 +1,473 @@ +import os +import tempfile +import unittest +from unittest.mock import patch + +from google.protobuf import text_format + +from google.adk.scope import features_pb2 +from google.adk.scope.matcher import matcher +from google.adk.scope.reporter import reporter + + +class TestReporter(unittest.TestCase): + def test_read_feature_registry(self): + content = """ + language: "PYTHON" + version: "1.0.0" + features { + original_name: "test_feature" + normalized_name: "test_feature" + type: FUNCTION + } + """ + with tempfile.NamedTemporaryFile( + mode="w", suffix=".txtpb", delete=False + ) as f: + f.write(content) + temp_path = f.name + + try: + registry = reporter._read_feature_registry(temp_path) + self.assertEqual(registry.language, "PYTHON") + self.assertEqual(registry.version, "1.0.0") + self.assertEqual(len(registry.features), 1) + self.assertEqual(registry.features[0].original_name, "test_feature") + self.assertEqual( + registry.features[0].type, features_pb2.Feature.Type.FUNCTION + ) + finally: + os.remove(temp_path) + + def test_match_registries(self): + # f1 & f2 are a solid match (score ~ 1.0) + f1 = features_pb2.Feature( + original_name="fSameBase", + normalized_name="f_same", + member_of="BaseClass", + namespace="google.adk.events", + normalized_member_of="c_same", + normalized_namespace="n_same", + type=features_pb2.Feature.Type.INSTANCE_METHOD, + ) + f2 = features_pb2.Feature( + original_name="fSameTarget", + normalized_name="f_same", + member_of="TargetClass", + namespace="adk.events", + normalized_member_of="c_same", + normalized_namespace="n_same", + type=features_pb2.Feature.Type.INSTANCE_METHOD, + ) + + # f_near_base & f_near_target are a near miss + # (different names, same structural namespace/class) + # Using different return types and different enough names to + # drop the score below 0.8 + f_near_base = features_pb2.Feature( + original_name="base_name", + normalized_name="base_name", + member_of="base_member", + namespace="google.adk.events", + normalized_member_of="base_member", + normalized_namespace="n_same", + original_return_types=["string"], + type=features_pb2.Feature.Type.INSTANCE_METHOD, + ) + f_near_target = features_pb2.Feature( + original_name="target_name", + normalized_name="targ_name", + member_of="target_member", + namespace="adk.events", + normalized_member_of="target_member", + normalized_namespace="n_same", + original_return_types=["int"], + type=features_pb2.Feature.Type.INSTANCE_METHOD, + ) + + # f3 is a complete gap (base-exclusive) + f3 = features_pb2.Feature( + original_name="totally_diff", + normalized_name="totally", + member_of="null", + namespace="google.adk.events", + normalized_member_of="different", + normalized_namespace="stuff", + type=features_pb2.Feature.Type.INSTANCE_METHOD, + ) + + base_registry = features_pb2.FeatureRegistry( + language="Python", version="1.0.0" + ) + base_registry.features.extend([f1, f_near_base, f3]) + + target_registry = features_pb2.FeatureRegistry( + language="TypeScript", version="2.0.0" + ) + target_registry.features.extend([f2, f_near_target]) + + # Test Symmetric Report + result_sym = reporter.match_registries( + base_registry, target_registry, 0.9, report_type="symmetric" + ) + report_sym = result_sym.master_content + + # 1. Verify Master Report Structure + self.assertIn("# Feature Matching Report: Symmetric", report_sym) + self.assertIn("**Jaccard Index:** 25.00%", report_sym) + self.assertIn("## Module Summary", report_sym) + + # Check for module entry in master summary + self.assertIn("| `n_same` |", report_sym) + self.assertIn("[View Details]({modules_dir}/n_same.md)", report_sym) + + # 2. Verify Module Content + self.assertIn("n_same.md", result_sym.module_files) + module_content = result_sym.module_files["n_same.md"] + + self.assertIn("# Module: `n_same`", module_content) + self.assertIn("**Features:** 3", module_content) + + # Solid Matches + self.assertIn("### ✅ Solid Features", module_content) + self.assertIn( + "| Type | Base Feature | Target Feature | Similarity Score |", + module_content, + ) + self.assertIn( + "| method | `BaseClass.fSameBase` | `TargetClass.fSameTarget` |", + module_content, + ) + + # Potential Matches (formerly Near Misses) + self.assertIn("### ⚠️ Potential Matches", module_content) + self.assertIn( + "| Type | Base Feature | Closest Target Candidate | Similarity |", + module_content, + ) + self.assertIn( + "| method | `base_member.base_name` | " + "`target_member.target_name` |", + module_content, + ) + + # Unmatched / Gaps (in 'stuff' module) + self.assertIn("stuff.md", result_sym.module_files) + stuff_content = result_sym.module_files["stuff.md"] + self.assertIn("### ❌ Unmatched Features", stuff_content) + self.assertIn("| `totally_diff` | Target |", stuff_content) + self.assertIn("**Features:** 1", stuff_content) + + # Test Directional Report + result_dir = reporter.match_registries( + base_registry, target_registry, 0.9, report_type="directional" + ) + report_dir = result_dir.master_content + + self.assertIn("| **F1 Score** | 40.00% |", report_dir) + self.assertIn("n_same.md", result_dir.module_files) + + mod_dir_content = result_dir.module_files["n_same.md"] + + # Solid Matches + self.assertIn("### ✅ Matched Features", mod_dir_content) + self.assertIn( + "| Type | Base Feature | Target Feature | Similarity Score |", + mod_dir_content, + ) + self.assertIn( + "| method | `BaseClass.fSameBase` | `TargetClass.fSameTarget` |", + mod_dir_content, + ) + + # Potential Matches + self.assertIn("### ⚠️ Potential Matches", mod_dir_content) + self.assertIn( + "| Type | Base Feature | Closest Target Candidate | Similarity |", + mod_dir_content, + ) + self.assertIn( + "| method | `base_member.base_name` | " + "`target_member.target_name` |", + mod_dir_content, + ) + + # Unmatched / Gaps (in 'stuff' module) + self.assertIn("stuff.md", result_dir.module_files) + stuff_dir_content = result_dir.module_files["stuff.md"] + self.assertIn("### ❌ Missing in Target", stuff_dir_content) + self.assertIn("| `totally_diff` |", stuff_dir_content) + + def test_match_registries_raw(self): + f1 = features_pb2.Feature( + original_name="f_same", + normalized_name="f_same", + normalized_namespace="pkg", + member_of="MyClass", + normalized_member_of="myclass", + type=features_pb2.Feature.Type.FUNCTION, + ) + base = features_pb2.FeatureRegistry(language="Python", version="1") + base.features.append(f1) + target = features_pb2.FeatureRegistry(language="TS", version="2") + target.features.append(f1) + + result = reporter.match_registries(base, target, 0.9, report_type="raw") + csv_content = result.master_content + + expected_header = ( + "python_namespace,python_member_of,python_name,ts_namespace," + "ts_member_of,ts_name,type,score" + ) + self.assertIn(expected_header, csv_content) + + # Check for solid match line + # f1 has: ns=pkg, mem=MyClass, name=f_same + # Match should have same values for base and target + expected_line = "pkg,MyClass,f_same,pkg,MyClass,f_same,function,1.0000" + self.assertIn(expected_line, csv_content) + self.assertFalse(result.module_files) + + def test_group_features_by_module(self): + registry = features_pb2.FeatureRegistry() + f1 = registry.features.add() + f1.namespace = "module.one" + f2 = registry.features.add() + f2.namespace = "module.two" + f3 = registry.features.add() + f3.namespace = "module.one" + + result = reporter._group_features_by_module(registry) + + self.assertIn("module.one", result) + self.assertIn("module.two", result) + self.assertEqual(len(result["module.one"]), 2) + self.assertEqual(len(result["module.two"]), 1) + + def test_process_module(self): + """Tests the end-to-end processing of a single module.""" + f_base = features_pb2.Feature( + original_name="f1_base", + normalized_name="f1_base", + normalized_namespace="n1", + type=features_pb2.Feature.Type.FUNCTION, + ) + f_target = features_pb2.Feature( + original_name="f1_target", + normalized_name="f1_target", + normalized_namespace="n1", + type=features_pb2.Feature.Type.FUNCTION, + ) + + with patch( + "google.adk.scope.reporter.reporter.matcher.match_features" + ) as mock_match: + # Let's assume one solid match and no potential matches + mock_match.side_effect = [ + [(f_base, f_target, 0.95)], # Solid matches + [], # Potential matches + ] + + result = matcher.process_module( + module="n1", + base_list=[f_base], + target_list=[f_target], + alpha=0.9, + report_type="symmetric", + base_lang_code="py", + target_lang_code="ts", + ) + + self.assertEqual(result["solid_matches_count"], 1) + self.assertEqual(result["score"], 1.0) + self.assertIn("| py, ts |", result["row_content"]) + self.assertIn("# Module: `n1`", result["module_content"]) + self.assertIn("### ✅ Solid Features", result["module_content"]) + + def test_generate_raw_report(self): + """Tests the raw CSV report generation.""" + f_base = features_pb2.Feature( + original_name="f1_base", + normalized_name="f1_base", + namespace="n1", + member_of="c1", + type=features_pb2.Feature.Type.FUNCTION, + ) + + base_registry = features_pb2.FeatureRegistry( + language="Python", version="1.0.0" + ) + base_registry.features.extend([f_base]) + target_registry = features_pb2.FeatureRegistry( + language="TypeScript", version="2.0.0" + ) + + with patch( + "google.adk.scope.reporter.reporter.matcher.match_features" + ) as mock_match: + mock_match.return_value = [] # No matches for simplicity + + result = reporter.ReportGenerator( + base_registry, target_registry, 0.9 + ).generate_raw_report() + + self.assertIn( + "python_namespace,python_member_of,python_name", + result.master_content, + ) + self.assertIn("n1,c1,f1_base", result.master_content) + + def test_generate_symmetric_report(self): + """Tests the symmetric report generation.""" + base_registry = features_pb2.FeatureRegistry( + language="Python", version="1.0.0" + ) + f1 = base_registry.features.add() + f1.namespace = "n1" + target_registry = features_pb2.FeatureRegistry( + language="TypeScript", version="2.0.0" + ) + + with patch( + "google.adk.scope.reporter.reporter.matcher.process_module" + ) as mock_process: + mock_process.return_value = { + "solid_matches_count": 1, + "score": 1.0, + "row_content": "| py, ts | `n1` | 1 | 100.00% | ✅ | n1.md |", + "module_filename": "n1.md", + "module_content": "# Module: `n1`", + } + + result = reporter.ReportGenerator( + base_registry, target_registry, 0.9 + ).generate_symmetric_report() + + self.assertIn( + "# Feature Matching Report: Symmetric", result.master_content + ) + self.assertIn("**Jaccard Index:**", result.master_content) + self.assertIn("## Module Summary", result.master_content) + self.assertIn("| `n1` |", result.master_content) + self.assertIn("n1.md", result.module_files) + + def test_generate_directional_report(self): + """Tests the directional report generation.""" + base_registry = features_pb2.FeatureRegistry( + language="Python", version="1.0.0" + ) + f1 = base_registry.features.add() + f1.namespace = "n1" + target_registry = features_pb2.FeatureRegistry( + language="TypeScript", version="2.0.0" + ) + + with patch( + "google.adk.scope.reporter.reporter.matcher.process_module" + ) as mock_process: + mock_process.return_value = { + "solid_matches_count": 1, + "score": 1.0, + "row_content": "| `n1` | 1 | 100.00% | ✅ | n1.md |", + "module_filename": "n1.md", + "module_content": "# Module: `n1`", + } + + result = reporter.ReportGenerator( + base_registry, target_registry, 0.9 + ).generate_directional_report() + + self.assertIn( + "# Feature Matching Report: Directional", result.master_content + ) + self.assertIn("| **F1 Score** |", result.master_content) + self.assertIn("## Module Summary", result.master_content) + self.assertIn("| `n1` |", result.master_content) + self.assertIn("n1.md", result.module_files) + + def test_raw_integration(self): + """Tests the raw report generation end-to-end.""" + python_features_str = """ + language: "PYTHON" + version: "1.23.0" + features { + original_name: "load_artifact" + normalized_name: "load_artifact" + description: "description" + member_of: "InMemoryArtifactService" + normalized_member_of: "in_memory_artifact_service" + type: INSTANCE_METHOD + file_path: "adk/runners.py" + namespace: "runners" + normalized_namespace: "artifacts" + parameters { + original_name: "app_name" + normalized_name: "app_name" + original_types: "str" + normalized_types: STRING + description: "The app name." + } + parameters { + original_name: "session_id" + normalized_name: "session_id" + original_types: "Optional[str]" + normalized_types: STRING + normalized_types: NULL + description: "description" + is_optional: true + } + original_return_types: "Optional[types.Part]" + normalized_return_types: "OBJECT" + normalized_return_types: "NULL" + async: true + } + """ + + typescript_features_str = """ + language: "TYPESCRIPT" + version: "0.3.0" + features { + original_name: "loadArtifact" + normalized_name: "load_artifact" + member_of: "InMemoryArtifactService" + normalized_member_of: "in_memory_artifact_service" + type: INSTANCE_METHOD + file_path: "in_memory_artifact_service.ts" + namespace: "artifacts" + normalized_namespace: "artifacts" + parameters { + original_name: "request" + normalized_name: "request" + original_types: "LoadArtifactRequest" + normalized_types: OBJECT + } + original_return_types: "Promise" + normalized_return_types: "OBJECT" + normalized_return_types: "NULL" + async: true + } + """ + + py_registry = text_format.Parse( + python_features_str, features_pb2.FeatureRegistry() + ) + ts_registry = text_format.Parse( + typescript_features_str, features_pb2.FeatureRegistry() + ) + + result = reporter.ReportGenerator( + py_registry, ts_registry, 0.8 + ).generate_raw_report() + + self.assertIn( + "python_namespace,python_member_of,python_name,ts_namespace,ts_member_of,ts_name,type,score", + result.master_content, + ) + + print(result.master_content) + self.assertEqual(len(result.master_content.splitlines()), 2) + # A known match + + +if __name__ == "__main__": + unittest.main() diff --git a/test/adk/scope/utils/test_normalizer.py b/test/adk/scope/utils/test_normalizer.py index e6538c6..284bf54 100644 --- a/test/adk/scope/utils/test_normalizer.py +++ b/test/adk/scope/utils/test_normalizer.py @@ -43,11 +43,11 @@ def test_python_normalization(self): self.assertEqual(self.normalizer.normalize("list", "python"), ["LIST"]) self.assertEqual(self.normalizer.normalize("dict", "python"), ["MAP"]) self.assertEqual(self.normalizer.normalize("set", "python"), ["SET"]) - self.assertEqual(self.normalizer.normalize("None", "python"), ["null"]) + self.assertEqual(self.normalizer.normalize("None", "python"), ["NULL"]) self.assertEqual(self.normalizer.normalize("any", "python"), ["OBJECT"]) self.assertEqual( self.normalizer.normalize("Optional[str]", "python"), - ["STRING", "null"], + ["STRING", "NULL"], ) self.assertEqual( self.normalizer.normalize("Union[str, int]", "python"), @@ -60,6 +60,14 @@ def test_python_normalization(self): self.normalizer.normalize("Tuple[str, int]", "python"), ["STRING", "NUMBER"], ) + self.assertEqual( + self.normalizer.normalize("str | list[str]", "python"), + ["STRING", "LIST"], + ) + self.assertEqual( + self.normalizer.normalize("RunConfig | None", "python"), + ["OBJECT", "NULL"], + ) def test_typescript_normalization(self): self.assertEqual( @@ -84,7 +92,9 @@ def test_typescript_normalization(self): self.assertEqual( self.normalizer.normalize("Set", "typescript"), ["SET"] ) - self.assertEqual(self.normalizer.normalize("void", "typescript"), []) + self.assertEqual( + self.normalizer.normalize("void", "typescript"), ["NULL"] + ) self.assertEqual( self.normalizer.normalize("any", "typescript"), ["OBJECT"] ) @@ -97,6 +107,14 @@ def test_typescript_normalization(self): self.normalizer.normalize("string | number", "typescript"), ["STRING", "NUMBER"], ) + self.assertEqual( + self.normalizer.normalize("string | null", "typescript"), + ["STRING", "NULL"], + ) + self.assertEqual( + self.normalizer.normalize("string | undefined", "typescript"), + ["STRING", "NULL"], + ) def test_edge_cases(self): self.assertEqual(self.normalizer.normalize("", "python"), ["OBJECT"]) diff --git a/test/adk/scope/utils/test_similarity.py b/test/adk/scope/utils/test_similarity.py index 4cb0473..52aa86a 100644 --- a/test/adk/scope/utils/test_similarity.py +++ b/test/adk/scope/utils/test_similarity.py @@ -135,22 +135,25 @@ def test_run_async_integration(self): "normalized_namespace": "runner", "parameters": [ features_pb.Param( - normalized_name="user_id", normalized_types=["STRING"] + normalized_name="user_id", + normalized_types=[features_pb.ParamType.STRING], ), features_pb.Param( - normalized_name="session_id", normalized_types=["STRING"] + normalized_name="session_id", + normalized_types=[features_pb.ParamType.STRING], ), features_pb.Param( - normalized_name="new_message", normalized_types=["OBJECT"] + normalized_name="new_message", + normalized_types=[features_pb.ParamType.OBJECT], ), features_pb.Param( normalized_name="state_delta", - normalized_types=["OBJECT"], + normalized_types=[features_pb.ParamType.OBJECT], is_optional=True, ), features_pb.Param( normalized_name="run_config", - normalized_types=["OBJECT"], + normalized_types=[features_pb.ParamType.OBJECT], is_optional=True, ), ], @@ -166,33 +169,35 @@ def test_run_async_integration(self): "normalized_namespace": "runners", "parameters": [ features_pb.Param( - normalized_name="user_id", normalized_types=["STRING"] + normalized_name="user_id", + normalized_types=[features_pb.ParamType.STRING], ), features_pb.Param( - normalized_name="session_id", normalized_types=["STRING"] + normalized_name="session_id", + normalized_types=[features_pb.ParamType.STRING], ), features_pb.Param( normalized_name="invocation_id", - normalized_types=["STRING"], + normalized_types=[features_pb.ParamType.STRING], is_optional=True, ), features_pb.Param( normalized_name="new_message", - normalized_types=["OBJECT"], + normalized_types=[features_pb.ParamType.OBJECT], is_optional=True, ), features_pb.Param( normalized_name="state_delta", - normalized_types=["MAP"], + normalized_types=[features_pb.ParamType.MAP], is_optional=True, ), features_pb.Param( normalized_name="run_config", - normalized_types=["OBJECT"], + normalized_types=[features_pb.ParamType.OBJECT], is_optional=True, ), ], - "normalized_return_types": ["OBJECT", "null"], + "normalized_return_types": ["OBJECT", "NULL"], "async": True, "type": features_pb.Feature.Type.INSTANCE_METHOD, }