diff --git a/sourcecode-parser/graph/callgraph/builder.go b/sourcecode-parser/graph/callgraph/builder.go new file mode 100644 index 00000000..9a3e05d9 --- /dev/null +++ b/sourcecode-parser/graph/callgraph/builder.go @@ -0,0 +1,321 @@ +package callgraph + +import ( + "os" + "path/filepath" + "strings" + + "github.com/shivasurya/code-pathfinder/sourcecode-parser/graph" +) + +// BuildCallGraph constructs the complete call graph for a Python project. +// This is Pass 3 of the 3-pass algorithm: +// - Pass 1: BuildModuleRegistry - map files to modules +// - Pass 2: ExtractImports + ExtractCallSites - parse imports and calls +// - Pass 3: BuildCallGraph - resolve calls and build graph +// +// Algorithm: +// 1. For each Python file in the project: +// a. Extract imports to build ImportMap +// b. Extract call sites from AST +// c. Extract function definitions from main graph +// 2. For each call site: +// a. Resolve target name using ImportMap +// b. Find target function definition in registry +// c. Add edge from caller to callee +// d. Store detailed call site information +// +// Parameters: +// - codeGraph: the existing code graph with parsed AST nodes +// - registry: module registry mapping files to modules +// - projectRoot: absolute path to project root +// +// Returns: +// - CallGraph: complete call graph with edges and call sites +// - error: if any step fails +// +// Example: +// Given: +// File: myapp/views.py +// def get_user(): +// sanitize(data) # call to myapp.utils.sanitize +// +// Creates: +// edges: {"myapp.views.get_user": ["myapp.utils.sanitize"]} +// reverseEdges: {"myapp.utils.sanitize": ["myapp.views.get_user"]} +// callSites: {"myapp.views.get_user": [CallSite{Target: "sanitize", ...}]} +func BuildCallGraph(codeGraph *graph.CodeGraph, registry *ModuleRegistry, projectRoot string) (*CallGraph, error) { + callGraph := NewCallGraph() + + // First, index all function definitions from the code graph + // This builds the Functions map for quick lookup + indexFunctions(codeGraph, callGraph, registry) + + // Process each Python file in the project + for modulePath, filePath := range registry.Modules { + // Skip non-Python files + if !strings.HasSuffix(filePath, ".py") { + continue + } + + // Read source code for parsing + sourceCode, err := readFileBytes(filePath) + if err != nil { + // Skip files we can't read + continue + } + + // Extract imports to build ImportMap for this file + importMap, err := ExtractImports(filePath, sourceCode, registry) + if err != nil { + // Skip files with import errors + continue + } + + // Extract all call sites from this file + callSites, err := ExtractCallSites(filePath, sourceCode, importMap) + if err != nil { + // Skip files with call site extraction errors + continue + } + + // Get all function definitions in this file + fileFunctions := getFunctionsInFile(codeGraph, filePath) + + // Process each call site to resolve targets and build edges + for _, callSite := range callSites { + // Find the caller function containing this call site + callerFQN := findContainingFunction(callSite.Location, fileFunctions, modulePath) + if callerFQN == "" { + // Call at module level - use module name as caller + callerFQN = modulePath + } + + // Resolve the call target to a fully qualified name + targetFQN, resolved := resolveCallTarget(callSite.Target, importMap, registry, modulePath) + + // Update call site with resolution information + callSite.TargetFQN = targetFQN + callSite.Resolved = resolved + + // Add call site to graph (dereference pointer) + callGraph.AddCallSite(callerFQN, *callSite) + + // Add edge if we successfully resolved the target + if resolved { + callGraph.AddEdge(callerFQN, targetFQN) + } + } + } + + return callGraph, nil +} + +// indexFunctions builds the Functions map in the call graph. +// Extracts all function definitions from the code graph and maps them by FQN. +// +// Parameters: +// - codeGraph: the parsed code graph +// - callGraph: the call graph being built +// - registry: module registry for resolving file paths to modules +func indexFunctions(codeGraph *graph.CodeGraph, callGraph *CallGraph, registry *ModuleRegistry) { + for _, node := range codeGraph.Nodes { + // Only index function/method definitions + if node.Type != "method_declaration" && node.Type != "function_definition" { + continue + } + + // Get the module path for this function's file + modulePath, ok := registry.FileToModule[node.File] + if !ok { + continue + } + + // Build fully qualified name: module.function + fqn := modulePath + "." + node.Name + callGraph.Functions[fqn] = node + } +} + +// getFunctionsInFile returns all function definitions in a specific file. +// +// Parameters: +// - codeGraph: the parsed code graph +// - filePath: absolute path to the file +// +// Returns: +// - List of function/method nodes in the file, sorted by line number +func getFunctionsInFile(codeGraph *graph.CodeGraph, filePath string) []*graph.Node { + var functions []*graph.Node + + for _, node := range codeGraph.Nodes { + if node.File == filePath && + (node.Type == "method_declaration" || node.Type == "function_definition") { + functions = append(functions, node) + } + } + + return functions +} + +// findContainingFunction finds the function that contains a given call site location. +// Uses line numbers to determine which function a call belongs to. +// +// Algorithm: +// 1. Iterate through all functions in the file +// 2. Find function with the highest line number that's still <= call line +// 3. Return the FQN of that function +// +// Parameters: +// - location: source location of the call site +// - functions: all function definitions in the file +// - modulePath: module path of the file +// +// Returns: +// - Fully qualified name of the containing function, or empty if not found +func findContainingFunction(location Location, functions []*graph.Node, modulePath string) string { + var bestMatch *graph.Node + var bestLine uint32 + + for _, fn := range functions { + // Check if call site is after this function definition + if uint32(location.Line) >= fn.LineNumber { + // Keep track of the closest preceding function + if bestMatch == nil || fn.LineNumber > bestLine { + bestMatch = fn + bestLine = fn.LineNumber + } + } + } + + if bestMatch != nil { + return modulePath + "." + bestMatch.Name + } + + return "" +} + +// resolveCallTarget resolves a call target name to a fully qualified name. +// This is the core resolution logic that handles: +// - Direct function calls: sanitize() → myapp.utils.sanitize +// - Method calls: obj.method() → (unresolved, needs type inference) +// - Imported functions: from utils import sanitize; sanitize() → myapp.utils.sanitize +// - Qualified calls: utils.sanitize() → myapp.utils.sanitize +// +// Algorithm: +// 1. Check if target is a simple name (no dots) +// a. Look up in import map +// b. If found, return FQN from import +// c. If not found, try to find in same module +// 2. If target has dots (qualified name) +// a. Split into base and rest +// b. Resolve base using import map +// c. Append rest to get full FQN +// 3. If all else fails, check if it exists in the registry +// +// Parameters: +// - target: the call target name (e.g., "sanitize", "utils.sanitize", "obj.method") +// - importMap: import mappings for the current file +// - registry: module registry for validation +// - currentModule: the module containing this call +// +// Returns: +// - Fully qualified name of the target +// - Boolean indicating if resolution was successful +// +// Examples: +// target="sanitize", imports={"sanitize": "myapp.utils.sanitize"} +// → "myapp.utils.sanitize", true +// +// target="utils.sanitize", imports={"utils": "myapp.utils"} +// → "myapp.utils.sanitize", true +// +// target="obj.method", imports={} +// → "obj.method", false (needs type inference) +func resolveCallTarget(target string, importMap *ImportMap, registry *ModuleRegistry, currentModule string) (string, bool) { + // Handle simple names (no dots) + if !strings.Contains(target, ".") { + // Try to resolve through imports + if fqn, ok := importMap.Resolve(target); ok { + // Found in imports - return the FQN + // Validate if it exists in registry + resolved := validateFQN(fqn, registry) + return fqn, resolved + } + + // Not in imports - might be in same module + sameLevelFQN := currentModule + "." + target + if validateFQN(sameLevelFQN, registry) { + return sameLevelFQN, true + } + + // Can't resolve - return as-is + return target, false + } + + // Handle qualified names (with dots) + parts := strings.SplitN(target, ".", 2) + base := parts[0] + rest := parts[1] + + // Try to resolve base through imports + if baseFQN, ok := importMap.Resolve(base); ok { + fullFQN := baseFQN + "." + rest + if validateFQN(fullFQN, registry) { + return fullFQN, true + } + return fullFQN, false + } + + // Base not in imports - might be module-level access + // Try current module + fullFQN := currentModule + "." + target + if validateFQN(fullFQN, registry) { + return fullFQN, true + } + + // Can't resolve - return as-is + return target, false +} + +// validateFQN checks if a fully qualified name exists in the registry. +// Handles both module names and function names within modules. +// +// Examples: +// "myapp.utils" - checks if module exists +// "myapp.utils.sanitize" - checks if module "myapp.utils" exists +// +// Parameters: +// - fqn: fully qualified name to validate +// - registry: module registry +// +// Returns: +// - true if FQN is valid (module or function in existing module) +func validateFQN(fqn string, registry *ModuleRegistry) bool { + // Check if it's a module + if _, ok := registry.Modules[fqn]; ok { + return true + } + + // Check if parent module exists (for functions) + // "myapp.utils.sanitize" → check if "myapp.utils" exists + lastDot := strings.LastIndex(fqn, ".") + if lastDot > 0 { + parentModule := fqn[:lastDot] + if _, ok := registry.Modules[parentModule]; ok { + return true + } + } + + return false +} + +// readFileBytes reads a file and returns its contents as a byte slice. +// Helper function for reading source code. +func readFileBytes(filePath string) ([]byte, error) { + absPath, err := filepath.Abs(filePath) + if err != nil { + return nil, err + } + return os.ReadFile(absPath) +} diff --git a/sourcecode-parser/graph/callgraph/builder_test.go b/sourcecode-parser/graph/callgraph/builder_test.go new file mode 100644 index 00000000..a1644c69 --- /dev/null +++ b/sourcecode-parser/graph/callgraph/builder_test.go @@ -0,0 +1,449 @@ +package callgraph + +import ( + "os" + "path/filepath" + "testing" + + "github.com/shivasurya/code-pathfinder/sourcecode-parser/graph" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func TestResolveCallTarget_SimpleImportedFunction(t *testing.T) { + // Test resolving a simple imported function name + // from myapp.utils import sanitize + // sanitize() → myapp.utils.sanitize + + registry := NewModuleRegistry() + registry.AddModule("myapp.utils", "/project/myapp/utils.py") + registry.AddModule("myapp.views", "/project/myapp/views.py") + + importMap := NewImportMap("/project/myapp/views.py") + importMap.AddImport("sanitize", "myapp.utils.sanitize") + + fqn, resolved := resolveCallTarget("sanitize", importMap, registry, "myapp.views") + + assert.True(t, resolved) + assert.Equal(t, "myapp.utils.sanitize", fqn) +} + +func TestResolveCallTarget_QualifiedImport(t *testing.T) { + // Test resolving a qualified call through imported module + // import myapp.utils as utils + // utils.sanitize() → myapp.utils.sanitize + + registry := NewModuleRegistry() + registry.AddModule("myapp.utils", "/project/myapp/utils.py") + registry.AddModule("myapp.views", "/project/myapp/views.py") + + importMap := NewImportMap("/project/myapp/views.py") + importMap.AddImport("utils", "myapp.utils") + + fqn, resolved := resolveCallTarget("utils.sanitize", importMap, registry, "myapp.views") + + assert.True(t, resolved) + assert.Equal(t, "myapp.utils.sanitize", fqn) +} + +func TestResolveCallTarget_SameModuleFunction(t *testing.T) { + // Test resolving a function in the same module + // No imports needed - just local function call + + registry := NewModuleRegistry() + registry.AddModule("myapp.views", "/project/myapp/views.py") + + importMap := NewImportMap("/project/myapp/views.py") + + fqn, resolved := resolveCallTarget("helper", importMap, registry, "myapp.views") + + assert.True(t, resolved) + assert.Equal(t, "myapp.views.helper", fqn) +} + +func TestResolveCallTarget_UnresolvedMethodCall(t *testing.T) { + // Test that method calls on objects are marked as unresolved + // obj.method() → can't resolve without type inference + + registry := NewModuleRegistry() + registry.AddModule("myapp.views", "/project/myapp/views.py") + + importMap := NewImportMap("/project/myapp/views.py") + + fqn, resolved := resolveCallTarget("obj.method", importMap, registry, "myapp.views") + + assert.False(t, resolved) + assert.Equal(t, "obj.method", fqn) +} + +func TestResolveCallTarget_NonExistentFunction(t *testing.T) { + // Test resolving a function that doesn't exist in registry + + registry := NewModuleRegistry() + registry.AddModule("myapp.views", "/project/myapp/views.py") + + importMap := NewImportMap("/project/myapp/views.py") + importMap.AddImport("missing", "nonexistent.module.function") + + fqn, resolved := resolveCallTarget("missing", importMap, registry, "myapp.views") + + assert.False(t, resolved) + assert.Equal(t, "nonexistent.module.function", fqn) +} + +func TestValidateFQN_ModuleExists(t *testing.T) { + registry := NewModuleRegistry() + registry.AddModule("myapp.utils", "/project/myapp/utils.py") + + valid := validateFQN("myapp.utils", registry) + assert.True(t, valid) +} + +func TestValidateFQN_FunctionInModule(t *testing.T) { + registry := NewModuleRegistry() + registry.AddModule("myapp.utils", "/project/myapp/utils.py") + + // Even though "myapp.utils.sanitize" isn't explicitly registered, + // it's valid because parent module "myapp.utils" exists + valid := validateFQN("myapp.utils.sanitize", registry) + assert.True(t, valid) +} + +func TestValidateFQN_NonExistent(t *testing.T) { + registry := NewModuleRegistry() + registry.AddModule("myapp.utils", "/project/myapp/utils.py") + + valid := validateFQN("nonexistent.module", registry) + assert.False(t, valid) +} + +func TestIndexFunctions(t *testing.T) { + // Test indexing function definitions from code graph + + registry := NewModuleRegistry() + registry.AddModule("myapp.views", "/project/myapp/views.py") + registry.AddModule("myapp.utils", "/project/myapp/utils.py") + + codeGraph := &graph.CodeGraph{ + Nodes: map[string]*graph.Node{ + "node1": { + ID: "node1", + Type: "function_definition", + Name: "get_user", + File: "/project/myapp/views.py", + LineNumber: 10, + }, + "node2": { + ID: "node2", + Type: "function_definition", + Name: "sanitize", + File: "/project/myapp/utils.py", + LineNumber: 5, + }, + "node3": { + ID: "node3", + Type: "class_declaration", + Name: "MyClass", + File: "/project/myapp/views.py", + }, + }, + } + + callGraph := NewCallGraph() + indexFunctions(codeGraph, callGraph, registry) + + // Should have indexed both functions + assert.Len(t, callGraph.Functions, 2) + assert.NotNil(t, callGraph.Functions["myapp.views.get_user"]) + assert.NotNil(t, callGraph.Functions["myapp.utils.sanitize"]) + // Should not index class declaration + assert.Nil(t, callGraph.Functions["myapp.views.MyClass"]) +} + +func TestGetFunctionsInFile(t *testing.T) { + codeGraph := &graph.CodeGraph{ + Nodes: map[string]*graph.Node{ + "node1": { + ID: "node1", + Type: "function_definition", + Name: "func1", + File: "/project/file1.py", + LineNumber: 10, + }, + "node2": { + ID: "node2", + Type: "function_definition", + Name: "func2", + File: "/project/file1.py", + LineNumber: 20, + }, + "node3": { + ID: "node3", + Type: "function_definition", + Name: "func3", + File: "/project/file2.py", + LineNumber: 5, + }, + }, + } + + functions := getFunctionsInFile(codeGraph, "/project/file1.py") + + assert.Len(t, functions, 2) + names := []string{functions[0].Name, functions[1].Name} + assert.Contains(t, names, "func1") + assert.Contains(t, names, "func2") +} + +func TestFindContainingFunction(t *testing.T) { + functions := []*graph.Node{ + { + Name: "func1", + LineNumber: 10, + }, + { + Name: "func2", + LineNumber: 30, + }, + } + + tests := []struct { + name string + callLine int + expectedFQN string + expectedEmpty bool + }{ + { + name: "Call before any function", + callLine: 5, + expectedEmpty: true, + }, + { + name: "Call in first function", + callLine: 15, + expectedFQN: "myapp.func1", + }, + { + name: "Call in second function", + callLine: 35, + expectedFQN: "myapp.func2", + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + location := Location{Line: tt.callLine} + fqn := findContainingFunction(location, functions, "myapp") + + if tt.expectedEmpty { + assert.Empty(t, fqn) + } else { + assert.Equal(t, tt.expectedFQN, fqn) + } + }) + } +} + +func TestBuildCallGraph_SimpleCase(t *testing.T) { + // Test building a simple call graph with one file and one function call + + // Create a temporary test fixture + tmpDir := t.TempDir() + viewsFile := filepath.Join(tmpDir, "views.py") + + sourceCode := []byte(` +def get_user(): + sanitize(data) +`) + + err := os.WriteFile(viewsFile, sourceCode, 0644) + require.NoError(t, err) + + // Build module registry + registry := NewModuleRegistry() + registry.AddModule("views", viewsFile) + + // Create a minimal code graph with function definition + codeGraph := &graph.CodeGraph{ + Nodes: map[string]*graph.Node{ + "node1": { + ID: "node1", + Type: "function_definition", + Name: "get_user", + File: viewsFile, + LineNumber: 2, + }, + "node2": { + ID: "node2", + Type: "function_definition", + Name: "sanitize", + File: viewsFile, + LineNumber: 10, // Hypothetical + }, + }, + } + + // Build call graph + callGraph, err := BuildCallGraph(codeGraph, registry, tmpDir) + + require.NoError(t, err) + require.NotNil(t, callGraph) + + // Verify call sites were extracted + assert.NotEmpty(t, callGraph.CallSites) + + // Verify functions were indexed + assert.NotEmpty(t, callGraph.Functions) +} + +func TestBuildCallGraph_WithImports(t *testing.T) { + // Test building call graph with imports between modules + + // Create temporary test fixtures + tmpDir := t.TempDir() + utilsDir := filepath.Join(tmpDir, "utils") + err := os.MkdirAll(utilsDir, 0755) + require.NoError(t, err) + + utilsFile := filepath.Join(utilsDir, "helpers.py") + viewsFile := filepath.Join(tmpDir, "views.py") + + utilsCode := []byte(` +def sanitize(data): + return data.strip() +`) + + viewsCode := []byte(` +from utils.helpers import sanitize + +def get_user(): + sanitize(data) +`) + + err = os.WriteFile(utilsFile, utilsCode, 0644) + require.NoError(t, err) + err = os.WriteFile(viewsFile, viewsCode, 0644) + require.NoError(t, err) + + // Build module registry + registry := NewModuleRegistry() + registry.AddModule("utils.helpers", utilsFile) + registry.AddModule("views", viewsFile) + + // Create code graph with both functions + codeGraph := &graph.CodeGraph{ + Nodes: map[string]*graph.Node{ + "node1": { + ID: "node1", + Type: "function_definition", + Name: "get_user", + File: viewsFile, + LineNumber: 4, + }, + "node2": { + ID: "node2", + Type: "function_definition", + Name: "sanitize", + File: utilsFile, + LineNumber: 2, + }, + }, + } + + // Build call graph + callGraph, err := BuildCallGraph(codeGraph, registry, tmpDir) + + require.NoError(t, err) + require.NotNil(t, callGraph) + + // Verify call sites + viewsCallSites := callGraph.CallSites["views.get_user"] + assert.NotEmpty(t, viewsCallSites, "Expected call sites for views.get_user") + + // Verify at least one call was found + if len(viewsCallSites) > 0 { + // Check that the call target was resolved + found := false + for _, cs := range viewsCallSites { + if cs.Target == "sanitize" { + found = true + // Should be resolved to utils.helpers.sanitize + assert.True(t, cs.Resolved, "Call should be resolved") + assert.Equal(t, "utils.helpers.sanitize", cs.TargetFQN) + } + } + assert.True(t, found, "Expected to find call to sanitize") + } + + // Verify edges + callees := callGraph.GetCallees("views.get_user") + assert.Contains(t, callees, "utils.helpers.sanitize", "Expected edge from get_user to sanitize") + + // Verify reverse edges + callers := callGraph.GetCallers("utils.helpers.sanitize") + assert.Contains(t, callers, "views.get_user", "Expected reverse edge from sanitize to get_user") +} + +func TestBuildCallGraph_WithTestFixture(t *testing.T) { + // Integration test with actual test fixtures + + // Use the callsites_test fixture we created in PR #5 + fixturePath := filepath.Join("..", "..", "..", "test-src", "python", "callsites_test") + absFixturePath, err := filepath.Abs(fixturePath) + require.NoError(t, err) + + // Check if fixture exists + if _, err := os.Stat(absFixturePath); os.IsNotExist(err) { + t.Skipf("Fixture directory not found: %s", absFixturePath) + } + + // Build module registry + registry, err := BuildModuleRegistry(absFixturePath) + require.NoError(t, err) + + // For this test, create a minimal code graph + // In real usage, this would come from the main graph building + codeGraph := &graph.CodeGraph{ + Nodes: make(map[string]*graph.Node), + } + + // Scan for Python files and create function nodes + err = filepath.Walk(absFixturePath, func(path string, info os.FileInfo, err error) error { + if err != nil { + return err + } + if info.IsDir() || filepath.Ext(path) != ".py" { + return nil + } + + modulePath, ok := registry.FileToModule[path] + if !ok { + return nil + } + + // Add some dummy function nodes + // In real scenario these would be parsed from AST + nodeID := "node_" + modulePath + "_process_data" + codeGraph.Nodes[nodeID] = &graph.Node{ + ID: nodeID, + Type: "function_definition", + Name: "process_data", + File: path, + LineNumber: 3, + } + + return nil + }) + require.NoError(t, err) + + // Build call graph + callGraph, err := BuildCallGraph(codeGraph, registry, absFixturePath) + + require.NoError(t, err) + require.NotNil(t, callGraph) + + // Just verify it runs without error + // Detailed validation would require more sophisticated fixtures + assert.NotNil(t, callGraph.Edges) + assert.NotNil(t, callGraph.CallSites) +}