Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion labs.yml
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,9 @@ commands:
- name: report-file
description: (Optional) Local filesystem `path` of the analysis report file to write
- name: source-tech
description: (Optional) The technology/platform of the sources to analyze
description: (Optional) Name of the Source System Technology you want to analyze
- name: generate-json
description: (Optional) Flag to indicate if a json file should be produced alongside the Excel file

- name: transpile
description: Transpile SQL/ETL sources to Databricks-compatible code
Expand Down
24 changes: 18 additions & 6 deletions src/databricks/labs/lakebridge/analyzer/lakebridge_analyzer.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
import tempfile
from pathlib import Path
from collections.abc import Callable
from typing import cast

from databricks.labs.blueprint.entrypoint import get_logger
from databricks.labs.blueprint.tui import Prompts
Expand Down Expand Up @@ -56,24 +57,31 @@ def get_source_system(self, platform: str | None = None) -> str:

class AnalyzerRunner:
def __init__(
self, runnable: Callable[[Path, Path, str, bool], None], move_file: Callable[[Path, Path], None], is_debug: bool
self,
runnable: Callable[[Path, Path, str, bool, bool], None],
move_file: Callable[[Path, Path], None],
is_debug: bool,
):
self._runnable = runnable
self._move_file = move_file
self._is_debug = is_debug

@classmethod
def create(cls, is_debug: bool = False) -> "AnalyzerRunner":
return cls(Analyzer.analyze, move_tmp_file, is_debug)
return cls(
cast(Callable[[Path, Path, str, bool, bool], None], Analyzer.analyze),
move_tmp_file,
is_debug,
)

def run(self, source_dir: Path, results_dir: Path, platform: str) -> AnalyzerResult:
def run(self, source_dir: Path, results_dir: Path, platform: str, generate_json: bool = False) -> AnalyzerResult:
logger.debug(f"Starting analyzer execution in ${source_dir} for ${platform}")

if not check_path(source_dir) or not check_path(results_dir):
raise ValueError(f"Invalid path(s) provided: source_dir={source_dir}, results_dir={results_dir}")

tmp_dir = self._temp_xlsx_path(results_dir)
self._runnable(source_dir, tmp_dir, platform, self._is_debug)
self._runnable(source_dir, tmp_dir, platform, self._is_debug, generate_json)
self._move_file(tmp_dir, Path(results_dir))
logger.info(f"Successfully Analyzed files in ${source_dir} for ${platform} and saved report to {results_dir}")
return AnalyzerResult(Path(source_dir), Path(results_dir), platform)
Expand All @@ -90,7 +98,11 @@ def __init__(self, prompts: AnalyzerPrompts, runner: AnalyzerRunner):
self._runner = runner

def run_analyzer(
self, source: str | None = None, results: str | None = None, platform: str | None = None
self,
source: str | None = None,
results: str | None = None,
platform: str | None = None,
generate_json: bool = False,
) -> AnalyzerResult:
if not source:
source_dir = self._prompts.get_source_directory()
Expand All @@ -108,4 +120,4 @@ def run_analyzer(

platform = self._prompts.get_source_system(platform)

return self._runner.run(source_dir, results_dir, platform)
return self._runner.run(source_dir, results_dir, platform, generate_json)
3 changes: 2 additions & 1 deletion src/databricks/labs/lakebridge/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -817,11 +817,12 @@ def analyze(
source_directory: str | None = None,
report_file: str | None = None,
source_tech: str | None = None,
generate_json: bool = False,
):
"""Run the Analyzer"""
ctx = ApplicationContext(w)
try:
result = ctx.analyzer.run_analyzer(source_directory, report_file, source_tech)
result = ctx.analyzer.run_analyzer(source_directory, report_file, source_tech, generate_json)
ctx.add_user_agent_extra("analyzer_source_tech", result.source_system)
finally:
exception_cls, _, _ = sys.exc_info()
Expand Down
Loading