Skip to content

Commit 69ad493

Browse files
shivasuryaclaude
andauthored
feat: Add core data structures for call graph (PR #1) (#322)
Add foundational data structures for Python call graph construction: New Types: - CallSite: Represents function call locations with arguments and resolution status - CallGraph: Maps functions to callees with forward/reverse edges - ModuleRegistry: Maps Python file paths to module paths - ImportMap: Tracks imports per file for name resolution - Location: Source code position tracking - Argument: Function call argument metadata Features: - 100% test coverage with comprehensive unit tests - Bidirectional call graph edges (forward and reverse) - Support for ambiguous short names in module registry - Helper functions for module path manipulation This establishes the foundation for 3-pass call graph algorithm: - Pass 1 (next PR): Module registry builder - Pass 2 (next PR): Import extraction and resolution - Pass 3 (next PR): Call graph construction Related: Phase 1 - Call Graph Construction & 3-Pass Algorithm 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-authored-by: Claude <noreply@anthropic.com>
1 parent a36e8e9 commit 69ad493

File tree

2 files changed

+835
-0
lines changed

2 files changed

+835
-0
lines changed
Lines changed: 259 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,259 @@
1+
package callgraph
2+
3+
import (
4+
"github.com/shivasurya/code-pathfinder/sourcecode-parser/graph"
5+
)
6+
7+
// Location represents a source code location for tracking call sites.
8+
// This enables precise mapping of where calls occur in the source code.
9+
type Location struct {
10+
File string // Absolute path to the source file
11+
Line int // Line number (1-indexed)
12+
Column int // Column number (1-indexed)
13+
}
14+
15+
// CallSite represents a function/method call location in the source code.
16+
// It captures both the syntactic information (where the call is) and
17+
// semantic information (what is being called and with what arguments).
18+
type CallSite struct {
19+
Target string // The name of the function being called (e.g., "eval", "utils.sanitize")
20+
Location Location // Where this call occurs in the source code
21+
Arguments []Argument // Arguments passed to the call
22+
Resolved bool // Whether we successfully resolved this call to a definition
23+
TargetFQN string // Fully qualified name after resolution (e.g., "myapp.utils.sanitize")
24+
}
25+
26+
// Argument represents a single argument passed to a function call.
27+
// Tracks both the value/expression and metadata about the argument.
28+
type Argument struct {
29+
Value string // The argument expression as a string
30+
IsVariable bool // Whether this argument is a variable reference
31+
Position int // Position in the argument list (0-indexed)
32+
}
33+
34+
// CallGraph represents the complete call graph of a program.
35+
// It maps function definitions to their call sites and provides
36+
// both forward (callers → callees) and reverse (callees → callers) edges.
37+
//
38+
// Example:
39+
// Function A calls B and C
40+
// edges: {"A": ["B", "C"]}
41+
// reverseEdges: {"B": ["A"], "C": ["A"]}
42+
type CallGraph struct {
43+
// Forward edges: maps fully qualified function name to list of functions it calls
44+
// Key: caller FQN (e.g., "myapp.views.get_user")
45+
// Value: list of callee FQNs (e.g., ["myapp.db.query", "myapp.utils.sanitize"])
46+
Edges map[string][]string
47+
48+
// Reverse edges: maps fully qualified function name to list of functions that call it
49+
// Useful for backward slicing and finding all callers of a function
50+
// Key: callee FQN
51+
// Value: list of caller FQNs
52+
ReverseEdges map[string][]string
53+
54+
// Detailed call site information for each function
55+
// Key: caller FQN
56+
// Value: list of all call sites within that function
57+
CallSites map[string][]CallSite
58+
59+
// Map from fully qualified name to the actual function node in the graph
60+
// This allows quick lookup of function metadata (line number, file, etc.)
61+
Functions map[string]*graph.Node
62+
}
63+
64+
// NewCallGraph creates and initializes a new CallGraph instance.
65+
// All maps are pre-allocated to avoid nil pointer issues.
66+
func NewCallGraph() *CallGraph {
67+
return &CallGraph{
68+
Edges: make(map[string][]string),
69+
ReverseEdges: make(map[string][]string),
70+
CallSites: make(map[string][]CallSite),
71+
Functions: make(map[string]*graph.Node),
72+
}
73+
}
74+
75+
// AddEdge adds a directed edge from caller to callee in the call graph.
76+
// Automatically updates both forward and reverse edges.
77+
//
78+
// Parameters:
79+
// - caller: fully qualified name of the calling function
80+
// - callee: fully qualified name of the called function
81+
func (cg *CallGraph) AddEdge(caller, callee string) {
82+
// Add forward edge
83+
if !contains(cg.Edges[caller], callee) {
84+
cg.Edges[caller] = append(cg.Edges[caller], callee)
85+
}
86+
87+
// Add reverse edge
88+
if !contains(cg.ReverseEdges[callee], caller) {
89+
cg.ReverseEdges[callee] = append(cg.ReverseEdges[callee], caller)
90+
}
91+
}
92+
93+
// AddCallSite adds a call site to the call graph.
94+
// This stores detailed information about where and how a function is called.
95+
//
96+
// Parameters:
97+
// - caller: fully qualified name of the calling function
98+
// - callSite: detailed information about the call
99+
func (cg *CallGraph) AddCallSite(caller string, callSite CallSite) {
100+
cg.CallSites[caller] = append(cg.CallSites[caller], callSite)
101+
}
102+
103+
// GetCallers returns all functions that call the specified function.
104+
// Uses the reverse edges for efficient lookup.
105+
//
106+
// Parameters:
107+
// - callee: fully qualified name of the function
108+
//
109+
// Returns:
110+
// - list of caller FQNs, or empty slice if no callers found
111+
func (cg *CallGraph) GetCallers(callee string) []string {
112+
if callers, ok := cg.ReverseEdges[callee]; ok {
113+
return callers
114+
}
115+
return []string{}
116+
}
117+
118+
// GetCallees returns all functions called by the specified function.
119+
// Uses the forward edges for efficient lookup.
120+
//
121+
// Parameters:
122+
// - caller: fully qualified name of the function
123+
//
124+
// Returns:
125+
// - list of callee FQNs, or empty slice if no callees found
126+
func (cg *CallGraph) GetCallees(caller string) []string {
127+
if callees, ok := cg.Edges[caller]; ok {
128+
return callees
129+
}
130+
return []string{}
131+
}
132+
133+
// ModuleRegistry maintains the mapping between Python file paths and module paths.
134+
// This is essential for resolving imports and building fully qualified names.
135+
//
136+
// Example:
137+
// File: /project/myapp/utils/helpers.py
138+
// Module: myapp.utils.helpers
139+
type ModuleRegistry struct {
140+
// Maps fully qualified module path to absolute file path
141+
// Key: "myapp.utils.helpers"
142+
// Value: "/absolute/path/to/myapp/utils/helpers.py"
143+
Modules map[string]string
144+
145+
// Maps short module names to all matching file paths (handles ambiguity)
146+
// Key: "helpers"
147+
// Value: ["/path/to/myapp/utils/helpers.py", "/path/to/lib/helpers.py"]
148+
ShortNames map[string][]string
149+
150+
// Cache for resolved imports to avoid redundant lookups
151+
// Key: import string (e.g., "utils.helpers")
152+
// Value: fully qualified module path
153+
ResolvedImports map[string]string
154+
}
155+
156+
// NewModuleRegistry creates and initializes a new ModuleRegistry instance.
157+
func NewModuleRegistry() *ModuleRegistry {
158+
return &ModuleRegistry{
159+
Modules: make(map[string]string),
160+
ShortNames: make(map[string][]string),
161+
ResolvedImports: make(map[string]string),
162+
}
163+
}
164+
165+
// AddModule registers a module in the registry.
166+
// Automatically indexes both the full module path and the short name.
167+
//
168+
// Parameters:
169+
// - modulePath: fully qualified module path (e.g., "myapp.utils.helpers")
170+
// - filePath: absolute file path (e.g., "/project/myapp/utils/helpers.py")
171+
func (mr *ModuleRegistry) AddModule(modulePath, filePath string) {
172+
mr.Modules[modulePath] = filePath
173+
174+
// Extract short name (last component)
175+
// "myapp.utils.helpers" → "helpers"
176+
shortName := extractShortName(modulePath)
177+
if !containsString(mr.ShortNames[shortName], filePath) {
178+
mr.ShortNames[shortName] = append(mr.ShortNames[shortName], filePath)
179+
}
180+
}
181+
182+
// GetModulePath returns the file path for a given module, if it exists.
183+
//
184+
// Parameters:
185+
// - modulePath: fully qualified module path
186+
//
187+
// Returns:
188+
// - file path and true if found, empty string and false otherwise
189+
func (mr *ModuleRegistry) GetModulePath(modulePath string) (string, bool) {
190+
filePath, ok := mr.Modules[modulePath]
191+
return filePath, ok
192+
}
193+
194+
// ImportMap represents the import statements in a single Python file.
195+
// Maps local aliases to fully qualified module paths.
196+
//
197+
// Example:
198+
// File contains: from myapp.utils import sanitize as clean
199+
// Imports: {"clean": "myapp.utils.sanitize"}
200+
type ImportMap struct {
201+
FilePath string // Absolute path to the file containing these imports
202+
Imports map[string]string // Maps alias/name to fully qualified module path
203+
}
204+
205+
// NewImportMap creates and initializes a new ImportMap instance.
206+
func NewImportMap(filePath string) *ImportMap {
207+
return &ImportMap{
208+
FilePath: filePath,
209+
Imports: make(map[string]string),
210+
}
211+
}
212+
213+
// AddImport adds an import mapping to the import map.
214+
//
215+
// Parameters:
216+
// - alias: the local name used in the file (e.g., "clean", "sanitize", "utils")
217+
// - fqn: the fully qualified name (e.g., "myapp.utils.sanitize")
218+
func (im *ImportMap) AddImport(alias, fqn string) {
219+
im.Imports[alias] = fqn
220+
}
221+
222+
// Resolve looks up the fully qualified name for a local alias.
223+
//
224+
// Parameters:
225+
// - alias: the local name to resolve
226+
//
227+
// Returns:
228+
// - fully qualified name and true if found, empty string and false otherwise
229+
func (im *ImportMap) Resolve(alias string) (string, bool) {
230+
fqn, ok := im.Imports[alias]
231+
return fqn, ok
232+
}
233+
234+
// Helper function to check if a string slice contains a specific string.
235+
func contains(slice []string, item string) bool {
236+
for _, s := range slice {
237+
if s == item {
238+
return true
239+
}
240+
}
241+
return false
242+
}
243+
244+
// Helper function alias for consistency.
245+
func containsString(slice []string, item string) bool {
246+
return contains(slice, item)
247+
}
248+
249+
// Helper function to extract the last component of a dotted path.
250+
// Example: "myapp.utils.helpers" → "helpers".
251+
func extractShortName(modulePath string) string {
252+
// Find last dot
253+
for i := len(modulePath) - 1; i >= 0; i-- {
254+
if modulePath[i] == '.' {
255+
return modulePath[i+1:]
256+
}
257+
}
258+
return modulePath
259+
}

0 commit comments

Comments
 (0)