Skip to content

Commit 3e67611

Browse files
committed
Add duplicate file analysis for Android
1 parent 6048ea6 commit 3e67611

File tree

4 files changed

+97
-14
lines changed

4 files changed

+97
-14
lines changed

src/launchpad/analyzers/android.py

Lines changed: 30 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,19 @@
11
from __future__ import annotations
22

33
from datetime import datetime, timezone
4-
from typing import Any
54

65
from ..artifacts.android.aab import AAB
76
from ..artifacts.android.apk import APK
87
from ..artifacts.android.zipped_aab import ZippedAAB
98
from ..artifacts.android.zipped_apk import ZippedAPK
109
from ..artifacts.artifact import AndroidArtifact
11-
from ..models.android import AndroidAnalysisResults, AndroidAppInfo
10+
from ..insights.common import DuplicateFilesInsight
11+
from ..insights.insight import InsightsInput
12+
from ..models.android import (
13+
AndroidAnalysisResults,
14+
AndroidAppInfo,
15+
AndroidInsightResults,
16+
)
1217
from ..models.common import FileAnalysis, FileInfo
1318
from ..models.treemap import FILE_TYPE_TO_TREEMAP_TYPE, TreemapType
1419
from ..utils.file_utils import calculate_file_hash
@@ -25,8 +30,14 @@
2530
class AndroidAnalyzer:
2631
"""Analyzer for Android apps (.apk, .aab files)."""
2732

28-
def __init__(self, **_: Any) -> None:
29-
pass
33+
def __init__(
34+
self,
35+
skip_insights: bool = False,
36+
) -> None:
37+
"""Args:
38+
skip_insights: Skip insights generation for faster analysis
39+
"""
40+
self.skip_insights = skip_insights
3041

3142
def analyze(self, artifact: AndroidArtifact) -> AndroidAnalysisResults:
3243
manifest_dict = artifact.get_manifest().model_dump()
@@ -63,11 +74,25 @@ def analyze(self, artifact: AndroidArtifact) -> AndroidAnalysisResults:
6374

6475
treemap = treemap_builder.build_file_treemap(file_analysis)
6576

77+
insights: AndroidInsightResults | None = None
78+
if not self.skip_insights:
79+
logger.info("Generating insights from analysis results")
80+
insights_input = InsightsInput(
81+
app_info=app_info,
82+
file_analysis=file_analysis,
83+
treemap=treemap,
84+
binary_analysis=[],
85+
)
86+
insights = AndroidInsightResults(
87+
duplicate_files=DuplicateFilesInsight().generate(insights_input),
88+
)
89+
6690
return AndroidAnalysisResults(
6791
generated_at=datetime.now(timezone.utc),
6892
app_info=app_info,
6993
treemap=treemap,
7094
file_analysis=file_analysis,
95+
insights=insights,
7196
analysis_duration=None,
7297
)
7398

@@ -115,7 +140,7 @@ def _get_file_analysis(self, apks: list[APK]) -> FileAnalysis:
115140
size=merged_size,
116141
file_type=file_type,
117142
treemap_type=treemap_type,
118-
# Intentionally igoring hash of merged file
143+
# Intentionally ignoring hash of merged file
119144
hash_md5="",
120145
)
121146
path_to_file_info[relative_path] = merged_file_info

src/launchpad/insights/insight.py

Lines changed: 5 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,20 +1,19 @@
11
from abc import abstractmethod
22
from dataclasses import dataclass
3-
from typing import Protocol, TypeVar
3+
from typing import Protocol, Sequence, TypeVar
44

5-
from ..models.apple import AppleAppInfo, MachOBinaryAnalysis
6-
from ..models.common import FileAnalysis
7-
from ..models.treemap import TreemapResults
5+
from launchpad.models.common import BaseAppInfo, BaseBinaryAnalysis, FileAnalysis
6+
from launchpad.models.treemap import TreemapResults
87

98
T_co = TypeVar("T_co", covariant=True)
109

1110

1211
@dataclass
1312
class InsightsInput:
14-
app_info: AppleAppInfo
13+
app_info: BaseAppInfo
1514
file_analysis: FileAnalysis
1615
treemap: TreemapResults | None
17-
binary_analysis: list[MachOBinaryAnalysis]
16+
binary_analysis: Sequence[BaseBinaryAnalysis]
1817

1918

2019
class Insight(Protocol[T_co]):

src/launchpad/models/android.py

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,23 @@
1-
from pydantic import ConfigDict, Field
1+
from pydantic import BaseModel, ConfigDict, Field
22

33
from .common import BaseAnalysisResults, BaseAppInfo
4+
from .insights import DuplicateFilesInsightResult
45

56

67
class AndroidAppInfo(BaseAppInfo):
78
model_config = ConfigDict(frozen=True)
89
package_name: str = Field(..., description="Android package name")
910

1011

11-
class AndroidAnalysisResults(BaseAnalysisResults):
12-
"""Complete Android analysis results."""
12+
class AndroidInsightResults(BaseModel):
13+
model_config = ConfigDict(frozen=True)
14+
15+
duplicate_files: DuplicateFilesInsightResult | None = Field(None, description="Duplicate files analysis")
1316

17+
18+
class AndroidAnalysisResults(BaseAnalysisResults):
1419
model_config = ConfigDict(frozen=True)
1520
app_info: AndroidAppInfo = Field(..., description="Android app information")
21+
insights: AndroidInsightResults | None = Field(
22+
description="Generated insights from the analysis",
23+
)
Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,51 @@
1+
"""Tests for Android analyzer with duplicate file detection."""
2+
3+
from pathlib import Path
4+
5+
import pytest
6+
7+
from launchpad.analyzers.android import AndroidAnalyzer
8+
from launchpad.artifacts.artifact_factory import ArtifactFactory
9+
10+
11+
@pytest.fixture
12+
def test_apk_path() -> Path:
13+
return Path("tests/_fixtures/android/hn.apk")
14+
15+
16+
@pytest.fixture
17+
def android_analyzer() -> AndroidAnalyzer:
18+
return AndroidAnalyzer()
19+
20+
21+
class TestAndroidAnalyzer:
22+
def test_analyze_with_duplicate_detection(self, test_apk_path: Path, android_analyzer: AndroidAnalyzer) -> None:
23+
"""Test that Android analyzer includes duplicate file detection."""
24+
artifact = ArtifactFactory.from_path(test_apk_path)
25+
results = android_analyzer.analyze(artifact)
26+
27+
assert results.app_info.name == "Hacker News"
28+
assert results.app_info.package_name == "com.emergetools.hackernews"
29+
assert results.file_analysis is not None
30+
assert len(results.file_analysis.files) > 0
31+
32+
assert results.insights is not None
33+
assert results.insights.duplicate_files is not None
34+
35+
duplicate_insight = results.insights.duplicate_files
36+
assert hasattr(duplicate_insight, "files")
37+
assert hasattr(duplicate_insight, "total_savings")
38+
assert hasattr(duplicate_insight, "duplicate_count")
39+
assert isinstance(duplicate_insight.total_savings, int)
40+
assert isinstance(duplicate_insight.duplicate_count, int)
41+
assert duplicate_insight.total_savings == 51709
42+
assert duplicate_insight.duplicate_count == 52
43+
44+
def test_duplicate_files_have_hashes(self, test_apk_path: Path, android_analyzer: AndroidAnalyzer) -> None:
45+
"""Test that all files have MD5 hashes for duplicate detection."""
46+
artifact = ArtifactFactory.from_path(test_apk_path)
47+
results = android_analyzer.analyze(artifact)
48+
49+
for file_info in results.file_analysis.files:
50+
assert file_info.hash_md5 is not None
51+
assert len(file_info.hash_md5) > 0

0 commit comments

Comments
 (0)