Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
62 changes: 61 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,14 +6,15 @@
[![Python 3.9–3.14](https://img.shields.io/pypi/pyversions/spprof.svg)](https://pypi.org/project/spprof/)
[![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT)

A high-performance sampling profiler for Python with [Speedscope](https://www.speedscope.app) and FlameGraph output.
A high-performance sampling profiler for Python with [Speedscope](https://www.speedscope.app) and FlameGraph output. Includes both **CPU profiling** and **memory allocation profiling**.

## Features

- **Low overhead** — <1% CPU at 10ms sampling, suitable for production
- **Mixed-mode profiling** — Capture Python and C extension frames together
- **Multi-threaded** — Automatic profiling of all Python threads
- **Memory-efficient** — Stack aggregation for long-running profiles
- **Memory profiling** — Statistical heap profiling with <0.1% overhead
- **Cross-platform** — Linux, macOS, Windows
- **Python 3.9–3.14** — Including free-threaded builds (Linux & macOS)
- **Zero dependencies** — No runtime requirements
Expand Down Expand Up @@ -112,6 +113,65 @@ print(f"Compression: {aggregated.compression_ratio:.1f}x")
aggregated.save("profile.json")
```

## Memory Profiling

spprof includes a statistical memory allocation profiler for tracking heap usage:

```python
import spprof.memprof as memprof

# Start memory profiling
memprof.start(sampling_rate_kb=512) # Sample ~every 512KB

# ... your code ...
import numpy as np
data = np.zeros((1000, 1000)) # ~8MB allocation

# Get heap snapshot
snapshot = memprof.get_snapshot()
print(f"Estimated heap: {snapshot.estimated_heap_bytes / 1e6:.1f} MB")

# Show top allocators
for site in snapshot.top_allocators(5):
print(f" {site['function']}: {site['estimated_bytes'] / 1e6:.1f} MB")

memprof.stop()
```

### Memory Profiler Features

- **Ultra-low overhead** — <0.1% CPU at default 512KB sampling rate
- **Complete coverage** — Captures allocations from Python, C extensions, and native libraries
- **Platform-native hooks** — `malloc_logger` on macOS, `LD_PRELOAD` on Linux
- **Speedscope output** — Visualize memory profiles at [speedscope.app](https://speedscope.app)

### Memory Context Manager

```python
with memprof.MemoryProfiler(sampling_rate_kb=256) as mp:
run_workload()

mp.snapshot.save("memory_profile.json")
```

### Combined CPU + Memory Profiling

Both profilers run simultaneously without interference:

```python
import spprof
import spprof.memprof as memprof

spprof.start(interval_ms=10)
memprof.start(sampling_rate_kb=512)

# ... workload ...

cpu_profile = spprof.stop()
mem_snapshot = memprof.get_snapshot()
memprof.stop()
```

## Output Formats

### Speedscope (default)
Expand Down
213 changes: 213 additions & 0 deletions benchmarks/memory.py
Original file line number Diff line number Diff line change
Expand Up @@ -162,3 +162,216 @@ def main():

if __name__ == "__main__":
main()


# ============================================================================
# Memory Profiler Benchmarks (T119, T120)
# ============================================================================

def memprof_overhead_benchmark():
"""Benchmark memory profiler overhead at various sampling rates.

Task T119: Performance benchmark at various sampling rates
"""
import spprof.memprof as memprof

print("\n" + "=" * 70)
print("Memory Profiler Overhead Benchmark")
print("=" * 70)

def workload():
"""Mixed CPU/memory workload."""
result = 0
for i in range(100000):
result += i ** 2
if i % 100 == 0:
data = bytearray(1024)
del data
return result

# Baseline without profiler
gc.collect()
times = []
for _ in range(5):
start = time.perf_counter()
workload()
times.append(time.perf_counter() - start)
baseline_time = sum(times) / len(times)
print(f"\nBaseline (no profiler): {baseline_time*1000:.2f} ms")

# Test various sampling rates
rates = [64, 128, 256, 512, 1024]
results = []

for rate_kb in rates:
gc.collect()

# Reset module state
memprof._initialized = False
memprof._running = False
memprof._shutdown = False

times = []
for _ in range(5):
memprof.start(sampling_rate_kb=rate_kb)
start = time.perf_counter()
workload()
elapsed = time.perf_counter() - start
stats = memprof.get_stats()
memprof.stop()
memprof.shutdown()
memprof._initialized = False
memprof._running = False
memprof._shutdown = False
times.append(elapsed)

avg_time = sum(times) / len(times)
overhead = (avg_time - baseline_time) / baseline_time * 100

results.append({
"rate_kb": rate_kb,
"avg_time_ms": avg_time * 1000,
"overhead_pct": overhead,
"samples": stats.total_samples if stats else 0,
})

print(f" {rate_kb:4d} KB rate: {avg_time*1000:.2f} ms "
f"(overhead: {overhead:.3f}%, samples: {stats.total_samples if stats else 0})")

print("\nResults:")
print("-" * 50)
print(f"{'Rate (KB)':>10} {'Time (ms)':>12} {'Overhead %':>12} {'Samples':>10}")
print("-" * 50)
for r in results:
print(f"{r['rate_kb']:>10} {r['avg_time_ms']:>12.2f} "
f"{r['overhead_pct']:>12.3f} {r['samples']:>10}")

# Check target
target_rate = 512
for r in results:
if r['rate_kb'] == target_rate:
if r['overhead_pct'] < 0.1:
print(f"\n✓ Target overhead (<0.1% at {target_rate}KB) ACHIEVED: {r['overhead_pct']:.3f}%")
elif r['overhead_pct'] < 1.0:
print(f"\n⚠ Target overhead (<0.1% at {target_rate}KB) not met: {r['overhead_pct']:.3f}%")
else:
print(f"\n✗ High overhead at {target_rate}KB: {r['overhead_pct']:.2f}%")

return results


def memprof_footprint_benchmark():
"""Verify memory profiler footprint stays under 60MB.

Task T120: Memory footprint verification (<60MB)
"""
import resource
import spprof.memprof as memprof

print("\n" + "=" * 70)
print("Memory Profiler Footprint Benchmark")
print("=" * 70)

def get_rss_mb():
"""Get resident set size in MB."""
usage = resource.getrusage(resource.RUSAGE_SELF)
return usage.ru_maxrss / 1024 # ru_maxrss is in KB on Linux, bytes on macOS
# Note: On macOS, divide by 1024*1024 instead

# Baseline memory
gc.collect()
baseline_rss = get_rss_mb()
print(f"\nBaseline RSS: {baseline_rss:.2f} MB")

# Reset module state
memprof._initialized = False
memprof._running = False
memprof._shutdown = False

# Initialize profiler
memprof.start(sampling_rate_kb=64)

# Measure after initialization
gc.collect()
init_rss = get_rss_mb()
print(f"After init RSS: {init_rss:.2f} MB")
print(f"Init overhead: {init_rss - baseline_rss:.2f} MB")

# Do lots of allocations to exercise data structures
print("\nRunning workload with many allocations...")
objects = []
for i in range(10000):
obj = bytearray(512)
objects.append(obj)
if i % 2 == 0:
del objects[i // 2]
objects[i // 2] = None

# Measure after workload
gc.collect()
workload_rss = get_rss_mb()
stats = memprof.get_stats()

print(f"After workload RSS: {workload_rss:.2f} MB")
print(f"Total overhead: {workload_rss - baseline_rss:.2f} MB")
print(f"Samples: {stats.total_samples}")
print(f"Heap map load: {stats.heap_map_load_percent:.2f}%")

memprof.stop()
memprof.shutdown()

# Theoretical max footprint:
# - Heap map: 1M entries × 24 bytes = 24 MB
# - Stack table: 64K entries × 544 bytes = 35 MB
# - Bloom filter: 128 KB
# - Total: ~60 MB max
theoretical_max = 60

print(f"\nTheoretical max footprint: {theoretical_max} MB")
actual_overhead = workload_rss - baseline_rss

if actual_overhead < theoretical_max:
print(f"✓ Memory footprint OK: {actual_overhead:.2f} MB < {theoretical_max} MB")
else:
print(f"⚠ Memory footprint high: {actual_overhead:.2f} MB >= {theoretical_max} MB")

return {
"baseline_mb": baseline_rss,
"init_mb": init_rss,
"workload_mb": workload_rss,
"overhead_mb": actual_overhead,
"target_mb": theoretical_max,
"passed": actual_overhead < theoretical_max,
}


def run_memprof_benchmarks():
"""Run all memory profiler benchmarks."""
print("=" * 70)
print("Memory Profiler Benchmarks")
print("=" * 70)

try:
overhead_results = memprof_overhead_benchmark()
except Exception as e:
print(f"Overhead benchmark failed: {e}")
overhead_results = None

try:
footprint_results = memprof_footprint_benchmark()
except Exception as e:
print(f"Footprint benchmark failed: {e}")
footprint_results = None

print("\n" + "=" * 70)
print("Summary")
print("=" * 70)

if overhead_results:
for r in overhead_results:
if r['rate_kb'] == 512:
print(f"Overhead at 512KB: {r['overhead_pct']:.3f}%")

if footprint_results:
print(f"Memory footprint: {footprint_results['overhead_mb']:.2f} MB "
f"({'OK' if footprint_results['passed'] else 'HIGH'})")
Loading
Loading