-
Notifications
You must be signed in to change notification settings - Fork 5
fix: improve error handling for gadget goroutine failures #84
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -17,9 +17,9 @@ package gadget | |
| import ( | ||
| "context" | ||
| "fmt" | ||
| "log/slog" | ||
|
|
||
| gadgetcontext "github.com/inspektor-gadget/inspektor-gadget/pkg/gadget-context" | ||
| "golang.org/x/sync/errgroup" | ||
| ) | ||
|
|
||
| // RuntimeManager defines the interface for running gadgets | ||
|
|
@@ -61,24 +61,43 @@ func (r *Registry) Register(name string, config *GadgetConfig) { | |
| r.gadgets[name] = config | ||
| } | ||
|
|
||
| // RunAll starts all registered gadgets | ||
| func (r *Registry) RunAll(ctx context.Context) error { | ||
| // RunAll starts all registered gadgets and returns an errgroup that the caller | ||
| // can Wait() on. If any gadget fails, the errgroup's context is canceled, | ||
| // signaling other gadgets to stop. | ||
| func (r *Registry) RunAll(ctx context.Context) (*errgroup.Group, error) { | ||
| g, gCtx := errgroup.WithContext(ctx) | ||
|
|
||
| // Pre-pass: create all gadget contexts before starting any goroutines. | ||
| // This ensures that if CreateContext fails, no goroutines have been started. | ||
| type gadgetEntry struct { | ||
| name string | ||
| config *GadgetConfig | ||
| gadgetCtx *gadgetcontext.GadgetContext | ||
| } | ||
|
|
||
| entries := make([]gadgetEntry, 0, len(r.gadgets)) | ||
| for name, config := range r.gadgets { | ||
| contextManager := r.defaultContextManager | ||
| if config.Context != nil { | ||
| contextManager = config.Context | ||
| } | ||
|
|
||
| gadgetCtx, err := contextManager.CreateContext(ctx, config.Bytes, config.ImageName) | ||
| gadgetCtx, err := contextManager.CreateContext(gCtx, config.Bytes, config.ImageName) | ||
| if err != nil { | ||
| return fmt.Errorf("creating context for gadget %s: %w", name, err) | ||
| return nil, fmt.Errorf("creating context for gadget %s: %w", name, err) | ||
| } | ||
Sefi4 marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
|
||
| go func(name string, config *GadgetConfig, gadgetCtx *gadgetcontext.GadgetContext) { | ||
| if err := r.runtimeManager.RunGadget(gadgetCtx, config.Params); err != nil { | ||
| slog.Error("Error running gadget", "name", name, "error", err) | ||
| entries = append(entries, gadgetEntry{name: name, config: config, gadgetCtx: gadgetCtx}) | ||
| } | ||
|
|
||
| for _, e := range entries { | ||
| g.Go(func() error { | ||
| if err := r.runtimeManager.RunGadget(e.gadgetCtx, e.config.Params); err != nil { | ||
| return fmt.Errorf("running gadget %s: %w", e.name, err) | ||
| } | ||
| }(name, config, gadgetCtx) | ||
| return nil | ||
| }) | ||
|
Comment on lines
+93
to
+99
|
||
| } | ||
| return nil | ||
|
|
||
| return g, nil | ||
| } | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -16,9 +16,9 @@ package gadget | |
|
|
||
| import ( | ||
| "context" | ||
| "fmt" | ||
| "sync" | ||
| "testing" | ||
| "time" | ||
|
|
||
| gadgetcontext "github.com/inspektor-gadget/inspektor-gadget/pkg/gadget-context" | ||
| ) | ||
|
|
@@ -80,14 +80,87 @@ func TestRegistry_RunAll(t *testing.T) { | |
| Params: map[string]string{"foo": "bar"}, | ||
| }) | ||
|
|
||
| if err := r.RunAll(context.Background()); err != nil { | ||
| g, err := r.RunAll(context.Background()) | ||
| if err != nil { | ||
| t.Fatalf("RunAll failed: %v", err) | ||
| } | ||
|
|
||
| if err := g.Wait(); err != nil { | ||
| t.Fatalf("Wait returned error: %v", err) | ||
| } | ||
|
|
||
| select { | ||
| case <-done: | ||
| // success | ||
| case <-time.After(2 * time.Millisecond): | ||
| t.Fatal("timeout waiting for RunGadget") | ||
| default: | ||
| t.Fatal("RunGadget was never called") | ||
| } | ||
| } | ||
|
|
||
| func TestRegistry_RunAll_ErrorPropagation(t *testing.T) { | ||
| mockRuntime := &mockRuntimeManager{ | ||
| runGadgetFunc: func(gadgetCtx *gadgetcontext.GadgetContext, params map[string]string) error { | ||
| return fmt.Errorf("gadget startup failed") | ||
| }, | ||
| } | ||
|
|
||
| mockContext := &mockContextCreator{} | ||
| r := NewRegistry(mockContext, mockRuntime) | ||
|
|
||
| r.Register("failing-gadget", &GadgetConfig{ | ||
| ImageName: "test-image", | ||
| Params: map[string]string{}, | ||
| }) | ||
|
|
||
| g, err := r.RunAll(context.Background()) | ||
| if err != nil { | ||
| t.Fatalf("RunAll failed: %v", err) | ||
| } | ||
|
|
||
| err = g.Wait() | ||
| if err == nil { | ||
| t.Fatal("expected error from Wait, got nil") | ||
| } | ||
|
|
||
| expected := "running gadget failing-gadget: gadget startup failed" | ||
| if err.Error() != expected { | ||
| t.Errorf("expected error %q, got %q", expected, err.Error()) | ||
| } | ||
|
Comment on lines
+125
to
+128
|
||
| } | ||
|
|
||
| func TestRegistry_RunAll_ContextCancellation(t *testing.T) { | ||
| mockRuntime := &mockRuntimeManager{ | ||
| runGadgetFunc: func(gadgetCtx *gadgetcontext.GadgetContext, params map[string]string) error { | ||
| // Simulate a long-running gadget that respects context cancellation | ||
| <-gadgetCtx.Context().Done() | ||
| return gadgetCtx.Context().Err() | ||
| }, | ||
|
Comment on lines
+131
to
+137
|
||
| } | ||
|
|
||
| mockContext := &mockContextCreator{} | ||
| r := NewRegistry(mockContext, mockRuntime) | ||
|
|
||
| r.Register("long-running", &GadgetConfig{ | ||
| ImageName: "test-image", | ||
| Params: map[string]string{}, | ||
| }) | ||
|
|
||
| r.Register("another", &GadgetConfig{ | ||
| ImageName: "test-image", | ||
| Params: map[string]string{}, | ||
| }) | ||
|
|
||
| ctx, cancel := context.WithCancel(context.Background()) | ||
| g, err := r.RunAll(ctx) | ||
| if err != nil { | ||
| t.Fatalf("RunAll failed: %v", err) | ||
| } | ||
|
|
||
| // Cancel the context to simulate shutdown signal | ||
| cancel() | ||
|
|
||
| err = g.Wait() | ||
| if err == nil { | ||
| t.Fatal("expected error from Wait after cancellation, got nil") | ||
| } | ||
| } | ||
Sefi4 marked this conversation as resolved.
Show resolved
Hide resolved
|
||
Uh oh!
There was an error while loading. Please reload this page.