Skip to content

Commit abbce43

Browse files
committed
Add duplicate file analysis for Android
1 parent b9445b8 commit abbce43

File tree

4 files changed

+97
-14
lines changed

4 files changed

+97
-14
lines changed

src/launchpad/analyzers/android.py

Lines changed: 30 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -2,14 +2,19 @@
22

33
import time
44
from datetime import datetime, timezone
5-
from typing import Any
65

76
from ..artifacts.android.aab import AAB
87
from ..artifacts.android.apk import APK
98
from ..artifacts.android.zipped_aab import ZippedAAB
109
from ..artifacts.android.zipped_apk import ZippedAPK
1110
from ..artifacts.artifact import AndroidArtifact
12-
from ..models.android import AndroidAnalysisResults, AndroidAppInfo
11+
from ..insights.common import DuplicateFilesInsight
12+
from ..insights.insight import InsightsInput
13+
from ..models.android import (
14+
AndroidAnalysisResults,
15+
AndroidAppInfo,
16+
AndroidInsightResults,
17+
)
1318
from ..models.common import FileAnalysis, FileInfo
1419
from ..models.treemap import FILE_TYPE_TO_TREEMAP_TYPE, TreemapType
1520
from ..utils.file_utils import calculate_file_hash
@@ -26,8 +31,14 @@
2631
class AndroidAnalyzer:
2732
"""Analyzer for Android apps (.apk, .aab files)."""
2833

29-
def __init__(self, **_: Any) -> None:
30-
pass
34+
def __init__(
35+
self,
36+
skip_insights: bool = False,
37+
) -> None:
38+
"""Args:
39+
skip_insights: Skip insights generation for faster analysis
40+
"""
41+
self.skip_insights = skip_insights
3142

3243
def analyze(self, artifact: AndroidArtifact) -> AndroidAnalysisResults:
3344
manifest_dict = artifact.get_manifest().model_dump()
@@ -65,13 +76,27 @@ def analyze(self, artifact: AndroidArtifact) -> AndroidAnalysisResults:
6576

6677
treemap = treemap_builder.build_file_treemap(file_analysis)
6778

79+
insights: AndroidInsightResults | None = None
80+
if not self.skip_insights:
81+
logger.info("Generating insights from analysis results")
82+
insights_input = InsightsInput(
83+
app_info=app_info,
84+
file_analysis=file_analysis,
85+
treemap=treemap,
86+
binary_analysis=[],
87+
)
88+
insights = AndroidInsightResults(
89+
duplicate_files=DuplicateFilesInsight().generate(insights_input),
90+
)
91+
6892
analysis_duration = time.time() - start_time
6993
return AndroidAnalysisResults(
7094
generated_at=datetime.now(timezone.utc),
7195
analysis_duration=analysis_duration,
7296
app_info=app_info,
7397
treemap=treemap,
7498
file_analysis=file_analysis,
99+
insights=insights,
75100
)
76101

77102
def _get_file_analysis(self, apks: list[APK]) -> FileAnalysis:
@@ -118,7 +143,7 @@ def _get_file_analysis(self, apks: list[APK]) -> FileAnalysis:
118143
size=merged_size,
119144
file_type=file_type,
120145
treemap_type=treemap_type,
121-
# Intentionally igoring hash of merged file
146+
# Intentionally ignoring hash of merged file
122147
hash_md5="",
123148
)
124149
path_to_file_info[relative_path] = merged_file_info

src/launchpad/insights/insight.py

Lines changed: 5 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,20 +1,19 @@
11
from abc import abstractmethod
22
from dataclasses import dataclass
3-
from typing import Protocol, TypeVar
3+
from typing import Protocol, Sequence, TypeVar
44

5-
from ..models.apple import AppleAppInfo, MachOBinaryAnalysis
6-
from ..models.common import FileAnalysis
7-
from ..models.treemap import TreemapResults
5+
from launchpad.models.common import BaseAppInfo, BaseBinaryAnalysis, FileAnalysis
6+
from launchpad.models.treemap import TreemapResults
87

98
T_co = TypeVar("T_co", covariant=True)
109

1110

1211
@dataclass
1312
class InsightsInput:
14-
app_info: AppleAppInfo
13+
app_info: BaseAppInfo
1514
file_analysis: FileAnalysis
1615
treemap: TreemapResults | None
17-
binary_analysis: list[MachOBinaryAnalysis]
16+
binary_analysis: Sequence[BaseBinaryAnalysis]
1817

1918

2019
class Insight(Protocol[T_co]):

src/launchpad/models/android.py

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
1-
from pydantic import ConfigDict, Field
1+
from pydantic import BaseModel, ConfigDict, Field
22

33
from .common import BaseAnalysisResults, BaseAppInfo
4+
from .insights import DuplicateFilesInsightResult
45

56

67
class AndroidAppInfo(BaseAppInfo):
@@ -9,8 +10,15 @@ class AndroidAppInfo(BaseAppInfo):
910
package_name: str = Field(..., description="Android package name")
1011

1112

12-
class AndroidAnalysisResults(BaseAnalysisResults):
13-
"""Complete Android analysis results."""
13+
class AndroidInsightResults(BaseModel):
14+
model_config = ConfigDict(frozen=True)
15+
16+
duplicate_files: DuplicateFilesInsightResult | None = Field(None, description="Duplicate files analysis")
1417

18+
19+
class AndroidAnalysisResults(BaseAnalysisResults):
1520
model_config = ConfigDict(frozen=True)
1621
app_info: AndroidAppInfo = Field(..., description="Android app information")
22+
insights: AndroidInsightResults | None = Field(
23+
description="Generated insights from the analysis",
24+
)
Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,51 @@
1+
"""Tests for Android analyzer with duplicate file detection."""
2+
3+
from pathlib import Path
4+
5+
import pytest
6+
7+
from launchpad.analyzers.android import AndroidAnalyzer
8+
from launchpad.artifacts.artifact_factory import ArtifactFactory
9+
10+
11+
@pytest.fixture
12+
def test_apk_path() -> Path:
13+
return Path("tests/_fixtures/android/hn.apk")
14+
15+
16+
@pytest.fixture
17+
def android_analyzer() -> AndroidAnalyzer:
18+
return AndroidAnalyzer()
19+
20+
21+
class TestAndroidAnalyzer:
22+
def test_analyze_with_duplicate_detection(self, test_apk_path: Path, android_analyzer: AndroidAnalyzer) -> None:
23+
"""Test that Android analyzer includes duplicate file detection."""
24+
artifact = ArtifactFactory.from_path(test_apk_path)
25+
results = android_analyzer.analyze(artifact)
26+
27+
assert results.app_info.name == "Hacker News"
28+
assert results.app_info.package_name == "com.emergetools.hackernews"
29+
assert results.file_analysis is not None
30+
assert len(results.file_analysis.files) > 0
31+
32+
assert results.insights is not None
33+
assert results.insights.duplicate_files is not None
34+
35+
duplicate_insight = results.insights.duplicate_files
36+
assert hasattr(duplicate_insight, "files")
37+
assert hasattr(duplicate_insight, "total_savings")
38+
assert hasattr(duplicate_insight, "duplicate_count")
39+
assert isinstance(duplicate_insight.total_savings, int)
40+
assert isinstance(duplicate_insight.duplicate_count, int)
41+
assert duplicate_insight.total_savings == 51709
42+
assert duplicate_insight.duplicate_count == 52
43+
44+
def test_duplicate_files_have_hashes(self, test_apk_path: Path, android_analyzer: AndroidAnalyzer) -> None:
45+
"""Test that all files have MD5 hashes for duplicate detection."""
46+
artifact = ArtifactFactory.from_path(test_apk_path)
47+
results = android_analyzer.analyze(artifact)
48+
49+
for file_info in results.file_analysis.files:
50+
assert file_info.hash_md5 is not None
51+
assert len(file_info.hash_md5) > 0

0 commit comments

Comments
 (0)