From 45e3b4679e69fb880f0ee8229bedada2d680eef0 Mon Sep 17 00:00:00 2001 From: Trevor Elkins Date: Mon, 21 Jul 2025 16:33:23 -0400 Subject: [PATCH 1/4] Add nanobind leak detector --- tests/conftest.py | 75 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 75 insertions(+) diff --git a/tests/conftest.py b/tests/conftest.py index a22fa2a1..be24b5b8 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -1,4 +1,8 @@ import os +import sys + +from io import StringIO +from typing import Generator import pytest @@ -7,3 +11,74 @@ def setup_test_environment(): """Set up test environment variables for all tests.""" os.environ.setdefault("LAUNCHPAD_ENV", "TEST") + + +class NanobindLeakDetector: + """Captures stderr and detects nanobind leak messages.""" + + def __init__(self): + self.original_stderr = sys.stderr + self.captured_stderr = StringIO() + self.leak_detected = False + self.leak_messages: list[str] = [] + + def start_capture(self): + """Start capturing stderr.""" + sys.stderr = self.captured_stderr + + def stop_capture_and_check(self): + """Stop capturing stderr and check for nanobind leaks.""" + # Restore original stderr + sys.stderr = self.original_stderr + + # Get captured content + captured_content = self.captured_stderr.getvalue() + + # Check for nanobind leak messages + lines = captured_content.split("\n") + leak_lines = [line for line in lines if "nanobind: leaked" in line.lower()] + + if leak_lines: + self.leak_detected = True + self.leak_messages.extend(leak_lines) + + # Write captured content to original stderr so it's still visible + if captured_content: + self.original_stderr.write(captured_content) + + # Reset for next capture + self.captured_stderr = StringIO() + + +@pytest.fixture(autouse=True) +def nanobind_leak_detector() -> Generator[NanobindLeakDetector, None, None]: + """Fixture that detects nanobind memory leaks during test execution.""" + detector = NanobindLeakDetector() + + # Start capturing before the test + detector.start_capture() + + try: + yield detector + finally: + # Stop capturing and check for leaks after the test + detector.stop_capture_and_check() + + # If leaks were detected, fail the test with a clear message + if detector.leak_detected: + leak_summary = "\n".join(f" • {msg}" for msg in detector.leak_messages) + pytest.fail( + f"❌ NANOBIND MEMORY LEAK DETECTED ❌\n\n" + f"This test caused nanobind memory leaks, which can lead to dangerous memory issues.\n" + f"Leaked objects indicate that Python objects backed by C++ objects weren't properly cleaned up.\n\n" + f"Leak messages detected:\n{leak_summary}\n\n" + f"💡 Common causes:\n" + f" • LIEF objects not being properly garbage collected\n" + f" • Circular references preventing cleanup\n" + f" • Missing explicit cleanup of binary objects\n\n" + f"🔧 Potential fixes:\n" + f" • Ensure LIEF binary objects are explicitly deleted when done\n" + f" • Use context managers or try/finally blocks for cleanup\n" + f" • Call gc.collect() after processing large binaries\n" + f" • Check for circular references in object graphs" + ) From a9205fc052f021f4f95a158b99c6df18f0a653c7 Mon Sep 17 00:00:00 2001 From: Trevor Elkins Date: Mon, 21 Jul 2025 16:35:48 -0400 Subject: [PATCH 2/4] add test --- tests/test_nanobind_leak_detector.py | 100 +++++++++++++++++++++++++++ 1 file changed, 100 insertions(+) create mode 100644 tests/test_nanobind_leak_detector.py diff --git a/tests/test_nanobind_leak_detector.py b/tests/test_nanobind_leak_detector.py new file mode 100644 index 00000000..f5f2a143 --- /dev/null +++ b/tests/test_nanobind_leak_detector.py @@ -0,0 +1,100 @@ +"""Test to verify nanobind leak detector is working by intentionally creating leaks.""" + +from __future__ import annotations + +import gc + +from pathlib import Path +from typing import Any + +import pytest + +try: + import lief +except ImportError: + pytest.skip("LIEF not available for testing", allow_module_level=True) + +# Global variable to hold leaked objects +_leaked_objects: list[Any] = [] + + +def test_intentional_nanobind_leak(nanobind_leak_detector: Any) -> None: + """Test that intentionally creates nanobind leaks to verify the detector works.""" + # Get a test binary to parse + test_artifacts_dir = Path(__file__).parent / "_fixtures" + test_binary = None + + # Look for any binary file in the test fixtures + for binary_path in test_artifacts_dir.rglob("*"): + if binary_path.is_file() and binary_path.suffix not in [".plist", ".txt", ".md"]: + try: + # Try to parse it with LIEF to see if it's a valid binary + parsed = lief.MachO.parse(str(binary_path)) # type: ignore + if parsed and parsed.size > 0: + test_binary = binary_path + break + except Exception: + continue + + if not test_binary: + pytest.skip("No suitable test binary found in fixtures") + + # Intentionally create leaked LIEF objects + # Parse the binary multiple times and store references globally + for _ in range(3): + fat_binary = lief.MachO.parse(str(test_binary)) # type: ignore + if fat_binary and fat_binary.size > 0: + binary = fat_binary.at(0) + + # Store references globally to prevent garbage collection + _leaked_objects.append(fat_binary) + _leaked_objects.append(binary) + + # Create more objects that won't be cleaned up + if binary.symbols: + for j, symbol in enumerate(binary.symbols): + _leaked_objects.append(symbol) + if j >= 10: # Don't leak too many + break + + # Don't clean up _leaked_objects - this should trigger the leak detector + print(f"Intentionally leaked {len(_leaked_objects)} nanobind objects") + + +def test_proper_cleanup_no_leak(nanobind_leak_detector: Any) -> None: + """Test that demonstrates proper cleanup doesn't trigger leak detector.""" + # Get a test binary to parse + test_artifacts_dir = Path(__file__).parent / "_fixtures" + test_binary = None + + # Look for any binary file in the test fixtures + for binary_path in test_artifacts_dir.rglob("*"): + if binary_path.is_file() and binary_path.suffix not in [".plist", ".txt", ".md"]: + try: + # Try to parse it with LIEF to see if it's a valid binary + parsed = lief.MachO.parse(str(binary_path)) # type: ignore + if parsed and parsed.size > 0: + test_binary = binary_path + break + except Exception: + continue + + if not test_binary: + pytest.skip("No suitable test binary found in fixtures") + + # Properly use LIEF objects without leaking + fat_binary = lief.MachO.parse(str(test_binary)) # type: ignore + if fat_binary and fat_binary.size > 0: + binary = fat_binary.at(0) + + # Use the objects but don't store global references + symbol_count = len(binary.symbols) + + print(f"Analyzed binary with {symbol_count} symbols") + + # Objects will be properly garbage collected when they go out of scope + del binary + del fat_binary + + # Force garbage collection to clean up + gc.collect() From 85a713ce3008cfb194b4b8ed7a94cc8ae06a9875 Mon Sep 17 00:00:00 2001 From: Trevor Elkins Date: Mon, 21 Jul 2025 17:13:52 -0400 Subject: [PATCH 3/4] fix --- tests/conftest.py | 15 +--- tests/test_nanobind_leak_detector.py | 100 --------------------------- 2 files changed, 2 insertions(+), 113 deletions(-) delete mode 100644 tests/test_nanobind_leak_detector.py diff --git a/tests/conftest.py b/tests/conftest.py index be24b5b8..74f437df 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -61,24 +61,13 @@ def nanobind_leak_detector() -> Generator[NanobindLeakDetector, None, None]: try: yield detector finally: - # Stop capturing and check for leaks after the test detector.stop_capture_and_check() - # If leaks were detected, fail the test with a clear message if detector.leak_detected: leak_summary = "\n".join(f" • {msg}" for msg in detector.leak_messages) pytest.fail( f"❌ NANOBIND MEMORY LEAK DETECTED ❌\n\n" - f"This test caused nanobind memory leaks, which can lead to dangerous memory issues.\n" - f"Leaked objects indicate that Python objects backed by C++ objects weren't properly cleaned up.\n\n" + f"This change caused nanobind memory leaks, which can lead to dangerous memory issues.\n" + f"Please review your code to ensure that all LIEF objects are properly cleaned up.\n\n" f"Leak messages detected:\n{leak_summary}\n\n" - f"💡 Common causes:\n" - f" • LIEF objects not being properly garbage collected\n" - f" • Circular references preventing cleanup\n" - f" • Missing explicit cleanup of binary objects\n\n" - f"🔧 Potential fixes:\n" - f" • Ensure LIEF binary objects are explicitly deleted when done\n" - f" • Use context managers or try/finally blocks for cleanup\n" - f" • Call gc.collect() after processing large binaries\n" - f" • Check for circular references in object graphs" ) diff --git a/tests/test_nanobind_leak_detector.py b/tests/test_nanobind_leak_detector.py deleted file mode 100644 index f5f2a143..00000000 --- a/tests/test_nanobind_leak_detector.py +++ /dev/null @@ -1,100 +0,0 @@ -"""Test to verify nanobind leak detector is working by intentionally creating leaks.""" - -from __future__ import annotations - -import gc - -from pathlib import Path -from typing import Any - -import pytest - -try: - import lief -except ImportError: - pytest.skip("LIEF not available for testing", allow_module_level=True) - -# Global variable to hold leaked objects -_leaked_objects: list[Any] = [] - - -def test_intentional_nanobind_leak(nanobind_leak_detector: Any) -> None: - """Test that intentionally creates nanobind leaks to verify the detector works.""" - # Get a test binary to parse - test_artifacts_dir = Path(__file__).parent / "_fixtures" - test_binary = None - - # Look for any binary file in the test fixtures - for binary_path in test_artifacts_dir.rglob("*"): - if binary_path.is_file() and binary_path.suffix not in [".plist", ".txt", ".md"]: - try: - # Try to parse it with LIEF to see if it's a valid binary - parsed = lief.MachO.parse(str(binary_path)) # type: ignore - if parsed and parsed.size > 0: - test_binary = binary_path - break - except Exception: - continue - - if not test_binary: - pytest.skip("No suitable test binary found in fixtures") - - # Intentionally create leaked LIEF objects - # Parse the binary multiple times and store references globally - for _ in range(3): - fat_binary = lief.MachO.parse(str(test_binary)) # type: ignore - if fat_binary and fat_binary.size > 0: - binary = fat_binary.at(0) - - # Store references globally to prevent garbage collection - _leaked_objects.append(fat_binary) - _leaked_objects.append(binary) - - # Create more objects that won't be cleaned up - if binary.symbols: - for j, symbol in enumerate(binary.symbols): - _leaked_objects.append(symbol) - if j >= 10: # Don't leak too many - break - - # Don't clean up _leaked_objects - this should trigger the leak detector - print(f"Intentionally leaked {len(_leaked_objects)} nanobind objects") - - -def test_proper_cleanup_no_leak(nanobind_leak_detector: Any) -> None: - """Test that demonstrates proper cleanup doesn't trigger leak detector.""" - # Get a test binary to parse - test_artifacts_dir = Path(__file__).parent / "_fixtures" - test_binary = None - - # Look for any binary file in the test fixtures - for binary_path in test_artifacts_dir.rglob("*"): - if binary_path.is_file() and binary_path.suffix not in [".plist", ".txt", ".md"]: - try: - # Try to parse it with LIEF to see if it's a valid binary - parsed = lief.MachO.parse(str(binary_path)) # type: ignore - if parsed and parsed.size > 0: - test_binary = binary_path - break - except Exception: - continue - - if not test_binary: - pytest.skip("No suitable test binary found in fixtures") - - # Properly use LIEF objects without leaking - fat_binary = lief.MachO.parse(str(test_binary)) # type: ignore - if fat_binary and fat_binary.size > 0: - binary = fat_binary.at(0) - - # Use the objects but don't store global references - symbol_count = len(binary.symbols) - - print(f"Analyzed binary with {symbol_count} symbols") - - # Objects will be properly garbage collected when they go out of scope - del binary - del fat_binary - - # Force garbage collection to clean up - gc.collect() From ed53935f9d05b210ca4073cfe335c7e1d92f6d7c Mon Sep 17 00:00:00 2001 From: Trevor Elkins Date: Mon, 21 Jul 2025 17:19:01 -0400 Subject: [PATCH 4/4] test --- tests/unit/test_large_audio_file_insight.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/tests/unit/test_large_audio_file_insight.py b/tests/unit/test_large_audio_file_insight.py index e4609632..04ebce7c 100644 --- a/tests/unit/test_large_audio_file_insight.py +++ b/tests/unit/test_large_audio_file_insight.py @@ -20,6 +20,11 @@ def test_generate_with_large_audio_files(self): treemap_type=TreemapType.ASSETS, hash_md5="hash1", ) + import sys + + print("DEBUG: About to write to stderr", file=sys.stdout) + print("nanobind: leaked telkins", file=sys.stderr) + print("DEBUG: Just wrote to stderr", file=sys.stdout) large_audio_2 = FileInfo( full_path="assets/large_wav.wav", path="assets/large_wav.wav",