diff --git a/labs.yml b/labs.yml index 3dfe067bcf..4798cd9b66 100644 --- a/labs.yml +++ b/labs.yml @@ -16,7 +16,9 @@ commands: - name: report-file description: (Optional) Local filesystem `path` of the analysis report file to write - name: source-tech - description: (Optional) The technology/platform of the sources to analyze + description: (Optional) Name of the Source System Technology you want to analyze + - name: generate-json + description: (Optional) Flag to indicate if a json file should be produced alongside the Excel file - name: transpile description: Transpile SQL/ETL sources to Databricks-compatible code diff --git a/src/databricks/labs/lakebridge/analyzer/lakebridge_analyzer.py b/src/databricks/labs/lakebridge/analyzer/lakebridge_analyzer.py index f589029964..51c8684555 100644 --- a/src/databricks/labs/lakebridge/analyzer/lakebridge_analyzer.py +++ b/src/databricks/labs/lakebridge/analyzer/lakebridge_analyzer.py @@ -2,6 +2,7 @@ import tempfile from pathlib import Path from collections.abc import Callable +from typing import cast from databricks.labs.blueprint.entrypoint import get_logger from databricks.labs.blueprint.tui import Prompts @@ -56,7 +57,10 @@ def get_source_system(self, platform: str | None = None) -> str: class AnalyzerRunner: def __init__( - self, runnable: Callable[[Path, Path, str, bool], None], move_file: Callable[[Path, Path], None], is_debug: bool + self, + runnable: Callable[[Path, Path, str, bool, bool], None], + move_file: Callable[[Path, Path], None], + is_debug: bool, ): self._runnable = runnable self._move_file = move_file @@ -64,16 +68,20 @@ def __init__( @classmethod def create(cls, is_debug: bool = False) -> "AnalyzerRunner": - return cls(Analyzer.analyze, move_tmp_file, is_debug) + return cls( + cast(Callable[[Path, Path, str, bool, bool], None], Analyzer.analyze), + move_tmp_file, + is_debug, + ) - def run(self, source_dir: Path, results_dir: Path, platform: str) -> AnalyzerResult: + def run(self, source_dir: Path, results_dir: Path, platform: str, generate_json: bool = False) -> AnalyzerResult: logger.debug(f"Starting analyzer execution in ${source_dir} for ${platform}") if not check_path(source_dir) or not check_path(results_dir): raise ValueError(f"Invalid path(s) provided: source_dir={source_dir}, results_dir={results_dir}") tmp_dir = self._temp_xlsx_path(results_dir) - self._runnable(source_dir, tmp_dir, platform, self._is_debug) + self._runnable(source_dir, tmp_dir, platform, self._is_debug, generate_json) self._move_file(tmp_dir, Path(results_dir)) logger.info(f"Successfully Analyzed files in ${source_dir} for ${platform} and saved report to {results_dir}") return AnalyzerResult(Path(source_dir), Path(results_dir), platform) @@ -90,7 +98,11 @@ def __init__(self, prompts: AnalyzerPrompts, runner: AnalyzerRunner): self._runner = runner def run_analyzer( - self, source: str | None = None, results: str | None = None, platform: str | None = None + self, + source: str | None = None, + results: str | None = None, + platform: str | None = None, + generate_json: bool = False, ) -> AnalyzerResult: if not source: source_dir = self._prompts.get_source_directory() @@ -108,4 +120,4 @@ def run_analyzer( platform = self._prompts.get_source_system(platform) - return self._runner.run(source_dir, results_dir, platform) + return self._runner.run(source_dir, results_dir, platform, generate_json) diff --git a/src/databricks/labs/lakebridge/cli.py b/src/databricks/labs/lakebridge/cli.py index a2d198f47f..7487c92c19 100644 --- a/src/databricks/labs/lakebridge/cli.py +++ b/src/databricks/labs/lakebridge/cli.py @@ -817,11 +817,12 @@ def analyze( source_directory: str | None = None, report_file: str | None = None, source_tech: str | None = None, + generate_json: bool = False, ): """Run the Analyzer""" ctx = ApplicationContext(w) try: - result = ctx.analyzer.run_analyzer(source_directory, report_file, source_tech) + result = ctx.analyzer.run_analyzer(source_directory, report_file, source_tech, generate_json) ctx.add_user_agent_extra("analyzer_source_tech", result.source_system) finally: exception_cls, _, _ = sys.exc_info()