|
| 1 | +"""Tests for Android analyzer with duplicate file detection.""" |
| 2 | + |
| 3 | +from pathlib import Path |
| 4 | + |
| 5 | +import pytest |
| 6 | + |
| 7 | +from launchpad.analyzers.android import AndroidAnalyzer |
| 8 | +from launchpad.artifacts import ArtifactFactory |
| 9 | + |
| 10 | + |
| 11 | +@pytest.fixture |
| 12 | +def test_apk_path() -> Path: |
| 13 | + return Path("tests/_fixtures/android/hn.apk") |
| 14 | + |
| 15 | + |
| 16 | +@pytest.fixture |
| 17 | +def android_analyzer() -> AndroidAnalyzer: |
| 18 | + return AndroidAnalyzer() |
| 19 | + |
| 20 | + |
| 21 | +class TestAndroidAnalyzer: |
| 22 | + def test_analyze_with_duplicate_detection(self, test_apk_path: Path, android_analyzer: AndroidAnalyzer) -> None: |
| 23 | + """Test that Android analyzer includes duplicate file detection.""" |
| 24 | + artifact = ArtifactFactory.from_path(test_apk_path) |
| 25 | + results = android_analyzer.analyze(artifact) |
| 26 | + |
| 27 | + # Verify basic analysis results |
| 28 | + assert results.app_info.name == "Hacker News" |
| 29 | + assert results.app_info.package_name == "com.emergetools.hackernews" |
| 30 | + assert results.file_analysis is not None |
| 31 | + assert len(results.file_analysis.files) > 0 |
| 32 | + |
| 33 | + # Verify insights are generated |
| 34 | + assert results.insights is not None |
| 35 | + assert results.insights.duplicate_files is not None |
| 36 | + |
| 37 | + # Verify duplicate files insight structure |
| 38 | + duplicate_insight = results.insights.duplicate_files |
| 39 | + assert hasattr(duplicate_insight, "files") |
| 40 | + assert hasattr(duplicate_insight, "total_savings") |
| 41 | + assert hasattr(duplicate_insight, "duplicate_count") |
| 42 | + assert isinstance(duplicate_insight.total_savings, int) |
| 43 | + assert isinstance(duplicate_insight.duplicate_count, int) |
| 44 | + |
| 45 | + def test_duplicate_files_have_hashes(self, test_apk_path: Path, android_analyzer: AndroidAnalyzer) -> None: |
| 46 | + """Test that all files have MD5 hashes for duplicate detection.""" |
| 47 | + artifact = ArtifactFactory.from_path(test_apk_path) |
| 48 | + results = android_analyzer.analyze(artifact) |
| 49 | + |
| 50 | + # Verify all files have hashes |
| 51 | + for file_info in results.file_analysis.files: |
| 52 | + assert file_info.hash_md5 is not None |
| 53 | + assert len(file_info.hash_md5) > 0 # MD5 hashes are 32 characters |
| 54 | + |
| 55 | + def test_duplicate_detection_algorithm(self, test_apk_path: Path, android_analyzer: AndroidAnalyzer) -> None: |
| 56 | + """Test that duplicate detection groups files by hash correctly.""" |
| 57 | + artifact = ArtifactFactory.from_path(test_apk_path) |
| 58 | + results = android_analyzer.analyze(artifact) |
| 59 | + |
| 60 | + # Group files by hash to verify the algorithm |
| 61 | + files_by_hash = {} |
| 62 | + for file_info in results.file_analysis.files: |
| 63 | + if file_info.hash_md5 not in files_by_hash: |
| 64 | + files_by_hash[file_info.hash_md5] = [] |
| 65 | + files_by_hash[file_info.hash_md5].append(file_info) |
| 66 | + |
| 67 | + # Check that the duplicate detection found the same groups |
| 68 | + duplicate_insight = results.insights.duplicate_files |
| 69 | + expected_duplicate_files = [] |
| 70 | + expected_total_savings = 0 |
| 71 | + |
| 72 | + for file_list in files_by_hash.values(): |
| 73 | + if len(file_list) > 1: |
| 74 | + # Calculate potential savings (all files except one) |
| 75 | + total_file_size = sum(f.size for f in file_list) |
| 76 | + savings = total_file_size - file_list[0].size |
| 77 | + |
| 78 | + if savings > 0: |
| 79 | + # Add all files except the first one (which we'll keep) |
| 80 | + expected_duplicate_files.extend(file_list[1:]) |
| 81 | + expected_total_savings += savings |
| 82 | + |
| 83 | + # Verify the insight results match our manual calculation |
| 84 | + assert len(duplicate_insight.files) == len(expected_duplicate_files) |
| 85 | + assert duplicate_insight.total_savings == expected_total_savings |
0 commit comments