Skip to content

Commit d1de077

Browse files
Extract module name utilities to module_utils.py
Move module name extraction logic from heatmap_collector to shared module_utils to enable flamegraph to display module names instead of full file paths.
1 parent 1eff27f commit d1de077

File tree

2 files changed

+123
-120
lines changed

2 files changed

+123
-120
lines changed

Lib/profiling/sampling/heatmap_collector.py

Lines changed: 1 addition & 120 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
from .collector import normalize_location, extract_lineno
2121
from .opcode_utils import get_opcode_info, format_opcode
2222
from .stack_collector import StackTraceCollector
23+
from .module_utils import extract_module_name, get_python_path_info
2324

2425

2526
# ============================================================================
@@ -49,126 +50,6 @@ class TreeNode:
4950
children: Dict[str, 'TreeNode'] = field(default_factory=dict)
5051

5152

52-
# ============================================================================
53-
# Module Path Analysis
54-
# ============================================================================
55-
56-
def get_python_path_info():
57-
"""Get information about Python installation paths for module extraction.
58-
59-
Returns:
60-
dict: Dictionary containing stdlib path, site-packages paths, and sys.path entries.
61-
"""
62-
info = {
63-
'stdlib': None,
64-
'site_packages': [],
65-
'sys_path': []
66-
}
67-
68-
# Get standard library path from os module location
69-
try:
70-
if hasattr(os, '__file__') and os.__file__:
71-
info['stdlib'] = Path(os.__file__).parent
72-
except (AttributeError, OSError):
73-
pass # Silently continue if we can't determine stdlib path
74-
75-
# Get site-packages directories
76-
site_packages = []
77-
try:
78-
site_packages.extend(Path(p) for p in site.getsitepackages())
79-
except (AttributeError, OSError):
80-
pass # Continue without site packages if unavailable
81-
82-
# Get user site-packages
83-
try:
84-
user_site = site.getusersitepackages()
85-
if user_site and Path(user_site).exists():
86-
site_packages.append(Path(user_site))
87-
except (AttributeError, OSError):
88-
pass # Continue without user site packages
89-
90-
info['site_packages'] = site_packages
91-
info['sys_path'] = [Path(p) for p in sys.path if p]
92-
93-
return info
94-
95-
96-
def extract_module_name(filename, path_info):
97-
"""Extract Python module name and type from file path.
98-
99-
Args:
100-
filename: Path to the Python file
101-
path_info: Dictionary from get_python_path_info()
102-
103-
Returns:
104-
tuple: (module_name, module_type) where module_type is one of:
105-
'stdlib', 'site-packages', 'project', or 'other'
106-
"""
107-
if not filename:
108-
return ('unknown', 'other')
109-
110-
try:
111-
file_path = Path(filename)
112-
except (ValueError, OSError):
113-
return (str(filename), 'other')
114-
115-
# Check if it's in stdlib
116-
if path_info['stdlib'] and _is_subpath(file_path, path_info['stdlib']):
117-
try:
118-
rel_path = file_path.relative_to(path_info['stdlib'])
119-
return (_path_to_module(rel_path), 'stdlib')
120-
except ValueError:
121-
pass
122-
123-
# Check site-packages
124-
for site_pkg in path_info['site_packages']:
125-
if _is_subpath(file_path, site_pkg):
126-
try:
127-
rel_path = file_path.relative_to(site_pkg)
128-
return (_path_to_module(rel_path), 'site-packages')
129-
except ValueError:
130-
continue
131-
132-
# Check other sys.path entries (project files)
133-
if not str(file_path).startswith(('<', '[')): # Skip special files
134-
for path_entry in path_info['sys_path']:
135-
if _is_subpath(file_path, path_entry):
136-
try:
137-
rel_path = file_path.relative_to(path_entry)
138-
return (_path_to_module(rel_path), 'project')
139-
except ValueError:
140-
continue
141-
142-
# Fallback: just use the filename
143-
return (_path_to_module(file_path), 'other')
144-
145-
146-
def _is_subpath(file_path, parent_path):
147-
try:
148-
file_path.relative_to(parent_path)
149-
return True
150-
except (ValueError, OSError):
151-
return False
152-
153-
154-
def _path_to_module(path):
155-
if isinstance(path, str):
156-
path = Path(path)
157-
158-
# Remove .py extension
159-
if path.suffix == '.py':
160-
path = path.with_suffix('')
161-
162-
# Convert path separators to dots
163-
parts = path.parts
164-
165-
# Handle __init__ files - they represent the package itself
166-
if parts and parts[-1] == '__init__':
167-
parts = parts[:-1]
168-
169-
return '.'.join(parts) if parts else path.stem
170-
171-
17253
# ============================================================================
17354
# Helper Classes
17455
# ============================================================================
Lines changed: 122 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,122 @@
1+
"""Utilities for extracting module names from file paths."""
2+
3+
import os
4+
import site
5+
import sys
6+
from pathlib import Path
7+
8+
9+
def get_python_path_info():
10+
"""Get information about Python's search paths.
11+
12+
Returns:
13+
dict: Dictionary containing stdlib path, site-packages paths, and sys.path entries.
14+
"""
15+
info = {
16+
'stdlib': None,
17+
'site_packages': [],
18+
'sys_path': []
19+
}
20+
21+
# Get standard library path from os module location
22+
try:
23+
if hasattr(os, '__file__') and os.__file__:
24+
info['stdlib'] = Path(os.__file__).parent
25+
except (AttributeError, OSError):
26+
pass # Silently continue if we can't determine stdlib path
27+
28+
# Get site-packages directories
29+
site_packages = []
30+
try:
31+
site_packages.extend(Path(p) for p in site.getsitepackages())
32+
except (AttributeError, OSError):
33+
pass # Continue without site packages if unavailable
34+
35+
# Get user site-packages
36+
try:
37+
user_site = site.getusersitepackages()
38+
if user_site and Path(user_site).exists():
39+
site_packages.append(Path(user_site))
40+
except (AttributeError, OSError):
41+
pass # Continue without user site packages
42+
43+
info['site_packages'] = site_packages
44+
info['sys_path'] = [Path(p) for p in sys.path if p]
45+
46+
return info
47+
48+
49+
def extract_module_name(filename, path_info):
50+
"""Extract Python module name and type from file path.
51+
52+
Args:
53+
filename: Path to the Python file
54+
path_info: Dictionary from get_python_path_info()
55+
56+
Returns:
57+
tuple: (module_name, module_type) where module_type is one of:
58+
'stdlib', 'site-packages', 'project', or 'other'
59+
"""
60+
if not filename:
61+
return ('unknown', 'other')
62+
63+
try:
64+
file_path = Path(filename)
65+
except (ValueError, OSError):
66+
return (str(filename), 'other')
67+
68+
# Check if it's in stdlib
69+
if path_info['stdlib'] and _is_subpath(file_path, path_info['stdlib']):
70+
try:
71+
rel_path = file_path.relative_to(path_info['stdlib'])
72+
return (_path_to_module(rel_path), 'stdlib')
73+
except ValueError:
74+
pass
75+
76+
# Check site-packages
77+
for site_pkg in path_info['site_packages']:
78+
if _is_subpath(file_path, site_pkg):
79+
try:
80+
rel_path = file_path.relative_to(site_pkg)
81+
return (_path_to_module(rel_path), 'site-packages')
82+
except ValueError:
83+
continue
84+
85+
# Check other sys.path entries (project files)
86+
if not str(file_path).startswith(('<', '[')): # Skip special files
87+
for path_entry in path_info['sys_path']:
88+
if _is_subpath(file_path, path_entry):
89+
try:
90+
rel_path = file_path.relative_to(path_entry)
91+
return (_path_to_module(rel_path), 'project')
92+
except ValueError:
93+
continue
94+
95+
# Fallback: just use the filename
96+
return (_path_to_module(file_path), 'other')
97+
98+
99+
def _is_subpath(file_path, parent_path):
100+
try:
101+
file_path.relative_to(parent_path)
102+
return True
103+
except (ValueError, OSError):
104+
return False
105+
106+
107+
def _path_to_module(path):
108+
if isinstance(path, str):
109+
path = Path(path)
110+
111+
# Remove .py extension
112+
if path.suffix == '.py':
113+
path = path.with_suffix('')
114+
115+
# Convert path separators to dots
116+
parts = path.parts
117+
118+
# Handle __init__ files - they represent the package itself
119+
if parts and parts[-1] == '__init__':
120+
parts = parts[:-1]
121+
122+
return '.'.join(parts) if parts else path.stem

0 commit comments

Comments
 (0)