Skip to content

Commit 32a46b2

Browse files
committed
Add duplicate file analysis for Android
1 parent 7b88498 commit 32a46b2

File tree

7 files changed

+123
-15
lines changed

7 files changed

+123
-15
lines changed

src/launchpad/analyzers/android.py

Lines changed: 26 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,9 @@
88
from ..artifacts.android.zipped_aab import ZippedAAB
99
from ..artifacts.android.zipped_apk import ZippedAPK
1010
from ..artifacts.artifact import AndroidArtifact
11-
from ..models.android import AndroidAnalysisResults, AndroidAppInfo
11+
from ..insights.common import DuplicateFilesInsight
12+
from ..insights.insight import InsightsInput
13+
from ..models.android import AndroidAnalysisResults, AndroidAppInfo, AndroidInsightResults
1214
from ..models.common import FileAnalysis, FileInfo
1315
from ..models.treemap import FILE_TYPE_TO_TREEMAP_TYPE, TreemapType
1416
from ..utils.file_utils import calculate_file_hash
@@ -25,6 +27,15 @@
2527
class AndroidAnalyzer:
2628
"""Analyzer for Android apps (.apk, .aab files)."""
2729

30+
def __init__(
31+
self,
32+
skip_insights: bool = False,
33+
) -> None:
34+
"""Args:
35+
skip_insights: Skip insights generation for faster analysis
36+
"""
37+
self.skip_insights = skip_insights
38+
2839
def analyze(self, artifact: AndroidArtifact) -> AndroidAnalysisResults:
2940
manifest_dict = artifact.get_manifest().model_dump()
3041
start_time = time.time()
@@ -61,13 +72,26 @@ def analyze(self, artifact: AndroidArtifact) -> AndroidAnalysisResults:
6172

6273
treemap = treemap_builder.build_file_treemap(file_analysis)
6374

75+
insights: AndroidInsightResults | None = None
76+
if not self.skip_insights:
77+
logger.info("Generating insights from analysis results")
78+
insights_input = InsightsInput(
79+
app_info=app_info,
80+
file_analysis=file_analysis,
81+
treemap=treemap,
82+
)
83+
insights = AndroidInsightResults(
84+
duplicate_files=DuplicateFilesInsight().generate(insights_input),
85+
)
86+
6487
analysis_duration = time.time() - start_time
6588
return AndroidAnalysisResults(
6689
generated_at=datetime.now(timezone.utc),
6790
analysis_duration=analysis_duration,
6891
app_info=app_info,
6992
treemap=treemap,
7093
file_analysis=file_analysis,
94+
insights=insights,
7195
)
7296

7397
def _get_file_analysis(self, apks: list[APK]) -> FileAnalysis:
@@ -114,7 +138,7 @@ def _get_file_analysis(self, apks: list[APK]) -> FileAnalysis:
114138
size=merged_size,
115139
file_type=file_type,
116140
treemap_type=treemap_type,
117-
# Intentionally igoring hash of merged file
141+
# Intentionally ignoring hash of merged file
118142
hash_md5="",
119143
)
120144
path_to_file_info[relative_path] = merged_file_info
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
from .artifact_factory import ArtifactFactory
2+
3+
__all__ = ["ArtifactFactory"]

src/launchpad/cli.py

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -217,6 +217,10 @@ def android(
217217
else:
218218
_print_android_table_output(results, quiet)
219219

220+
if not quiet:
221+
console.print(f"\n[bold green]✓[/bold green] Analysis completed in {duration:.2f}s")
222+
_print_android_summary(results)
223+
220224
except Exception as e:
221225
if verbose:
222226
console.print_exception()
@@ -377,6 +381,24 @@ def _print_file_analysis_table(file_analysis: FileAnalysis) -> None:
377381
console.print()
378382

379383

384+
def _print_android_summary(results: AndroidAnalysisResults) -> None:
385+
"""Print a brief summary of the analysis."""
386+
file_analysis = results.file_analysis
387+
insights = results.insights
388+
389+
console.print("\n[bold]Summary:[/bold]")
390+
console.print(f"• App name: [cyan]{results.app_info.name}[/cyan]")
391+
console.print(f"• Package name: [cyan]{results.app_info.package_name}[/cyan]")
392+
console.print(f"• Total app size: [cyan]{_format_bytes(file_analysis.total_size)}[/cyan]")
393+
console.print(f"• File count: [cyan]{file_analysis.file_count:,}[/cyan]")
394+
395+
if insights and insights.duplicate_files and insights.duplicate_files.total_savings > 0:
396+
console.print(
397+
f"• Potential savings from duplicates: "
398+
f"[yellow]{_format_bytes(insights.duplicate_files.total_savings)}[/yellow]"
399+
)
400+
401+
380402
def _print_apple_summary(results: AppleAnalysisResults) -> None:
381403
"""Print a brief summary of the analysis."""
382404
file_analysis = results.file_analysis

src/launchpad/insights/common.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,9 @@
1212

1313
class DuplicateFilesInsight(Insight[DuplicateFilesInsightResult]):
1414

15+
def __call__(self, input: InsightsInput) -> DuplicateFilesInsightResult:
16+
return self.generate(input)
17+
1518
def generate(self, input: InsightsInput) -> DuplicateFilesInsightResult:
1619
# Group files by hash
1720
files_by_hash: Dict[str, List[FileInfo]] = defaultdict(list)

src/launchpad/insights/insight.py

Lines changed: 7 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,20 +1,18 @@
1-
from abc import abstractmethod
21
from dataclasses import dataclass
3-
from typing import Protocol, TypeVar
2+
from typing import Protocol, Sequence, TypeVar
43

5-
from ..models.apple import AppleAppInfo, MachOBinaryAnalysis
6-
from ..models.common import FileAnalysis
7-
from ..models.treemap import TreemapResults
4+
from launchpad.models.common import BaseAppInfo, BaseBinaryAnalysis, FileAnalysis
5+
from launchpad.models.treemap import TreemapResults
86

97
T_co = TypeVar("T_co", covariant=True)
108

119

1210
@dataclass
1311
class InsightsInput:
14-
app_info: AppleAppInfo
12+
app_info: BaseAppInfo
1513
file_analysis: FileAnalysis
1614
treemap: TreemapResults | None
17-
binary_analysis: list[MachOBinaryAnalysis]
15+
binary_analysis: Sequence[BaseBinaryAnalysis] | None = None
1816

1917

2018
class Insight(Protocol[T_co]):
@@ -24,8 +22,7 @@ class Insight(Protocol[T_co]):
2422
All data needed for the insight must be collected during the main analysis phase.
2523
"""
2624

27-
@abstractmethod
28-
def generate(self, input: InsightsInput) -> T_co:
25+
def __call__(self, input: InsightsInput) -> T_co:
2926
"""Generate insights from analysis results.
3027
3128
Args:
@@ -34,4 +31,4 @@ def generate(self, input: InsightsInput) -> T_co:
3431
Returns:
3532
Typed insight results
3633
"""
37-
raise NotImplementedError("Not implemented")
34+
...

src/launchpad/models/android.py

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
1-
from pydantic import ConfigDict, Field
1+
from pydantic import BaseModel, ConfigDict, Field
22

33
from .common import BaseAnalysisResults, BaseAppInfo
4+
from .insights import DuplicateFilesInsightResult
45

56

67
class AndroidAppInfo(BaseAppInfo):
@@ -9,8 +10,15 @@ class AndroidAppInfo(BaseAppInfo):
910
package_name: str = Field(..., description="Android package name")
1011

1112

12-
class AndroidAnalysisResults(BaseAnalysisResults):
13-
"""Complete Android analysis results."""
13+
class AndroidInsightResults(BaseModel):
14+
model_config = ConfigDict(frozen=True)
15+
16+
duplicate_files: DuplicateFilesInsightResult | None = Field(None, description="Duplicate files analysis")
1417

18+
19+
class AndroidAnalysisResults(BaseAnalysisResults):
1520
model_config = ConfigDict(frozen=True)
1621
app_info: AndroidAppInfo = Field(..., description="Android app information")
22+
insights: AndroidInsightResults | None = Field(
23+
description="Generated insights from the analysis",
24+
)
Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,51 @@
1+
"""Tests for Android analyzer with duplicate file detection."""
2+
3+
from pathlib import Path
4+
5+
import pytest
6+
7+
from launchpad.analyzers.android import AndroidAnalyzer
8+
from launchpad.artifacts import ArtifactFactory
9+
10+
11+
@pytest.fixture
12+
def test_apk_path() -> Path:
13+
return Path("tests/_fixtures/android/hn.apk")
14+
15+
16+
@pytest.fixture
17+
def android_analyzer() -> AndroidAnalyzer:
18+
return AndroidAnalyzer()
19+
20+
21+
class TestAndroidAnalyzer:
22+
def test_analyze_with_duplicate_detection(self, test_apk_path: Path, android_analyzer: AndroidAnalyzer) -> None:
23+
"""Test that Android analyzer includes duplicate file detection."""
24+
artifact = ArtifactFactory.from_path(test_apk_path)
25+
results = android_analyzer.analyze(artifact)
26+
27+
assert results.app_info.name == "Hacker News"
28+
assert results.app_info.package_name == "com.emergetools.hackernews"
29+
assert results.file_analysis is not None
30+
assert len(results.file_analysis.files) > 0
31+
32+
assert results.insights is not None
33+
assert results.insights.duplicate_files is not None
34+
35+
duplicate_insight = results.insights.duplicate_files
36+
assert hasattr(duplicate_insight, "files")
37+
assert hasattr(duplicate_insight, "total_savings")
38+
assert hasattr(duplicate_insight, "duplicate_count")
39+
assert isinstance(duplicate_insight.total_savings, int)
40+
assert isinstance(duplicate_insight.duplicate_count, int)
41+
assert duplicate_insight.total_savings == 51709
42+
assert duplicate_insight.duplicate_count == 52
43+
44+
def test_duplicate_files_have_hashes(self, test_apk_path: Path, android_analyzer: AndroidAnalyzer) -> None:
45+
"""Test that all files have MD5 hashes for duplicate detection."""
46+
artifact = ArtifactFactory.from_path(test_apk_path)
47+
results = android_analyzer.analyze(artifact)
48+
49+
for file_info in results.file_analysis.files:
50+
assert file_info.hash_md5 is not None
51+
assert len(file_info.hash_md5) > 0

0 commit comments

Comments
 (0)