-
Notifications
You must be signed in to change notification settings - Fork 4
fix(penpal): prevent agent restart loop and UI vibration #373
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -30,23 +30,34 @@ type Agent struct { | |
| numTurns int // number of assistant turns | ||
| } | ||
|
|
||
| // quickExitThreshold is the minimum run time to consider an agent exit | ||
| // "normal". Exits faster than this are tracked as quick exits to prevent | ||
| // rapid restart loops. | ||
| const quickExitThreshold = 15 * time.Second | ||
|
|
||
| // restartCooldown is the minimum time to wait after a quick exit before | ||
| // allowing another agent start for the same project. | ||
| const restartCooldown = 30 * time.Second | ||
|
|
||
| // Manager manages Claude Code agent processes, one per project. | ||
| type Manager struct { | ||
| mu sync.Mutex | ||
| agents map[string]*Agent // key: qualified project name | ||
| cache *cache.Cache | ||
| comments *comments.Store | ||
| port int | ||
| onChange func(projectName string) // called when agent starts or stops | ||
| claudeBin func() string // returns resolved path to claude binary | ||
| mu sync.Mutex | ||
| agents map[string]*Agent // key: qualified project name | ||
| lastQuickExit map[string]time.Time // key: project name -> time of last quick exit | ||
| cache *cache.Cache | ||
| comments *comments.Store | ||
| port int | ||
| onChange func(projectName string) // called when agent starts or stops | ||
| claudeBin func() string // returns resolved path to claude binary | ||
| } | ||
|
|
||
| func New(c *cache.Cache, cs *comments.Store, port int) *Manager { | ||
| return &Manager{ | ||
| agents: make(map[string]*Agent), | ||
| cache: c, | ||
| comments: cs, | ||
| port: port, | ||
| agents: make(map[string]*Agent), | ||
| lastQuickExit: make(map[string]time.Time), | ||
| cache: c, | ||
| comments: cs, | ||
| port: port, | ||
| } | ||
| } | ||
|
|
||
|
|
@@ -65,7 +76,8 @@ func (m *Manager) SetOnChange(fn func(projectName string)) { | |
| } | ||
|
|
||
| // Start launches a Claude agent for the given project. | ||
| // Returns nil if an agent is already running for this project. | ||
| // Returns nil if an agent is already running for this project or if the | ||
| // project is in a restart cooldown (last agent exited quickly). | ||
| func (m *Manager) Start(projectName string) (*Agent, error) { | ||
| m.mu.Lock() | ||
| defer m.mu.Unlock() | ||
|
|
@@ -80,6 +92,13 @@ func (m *Manager) Start(projectName string) (*Agent, error) { | |
| } | ||
| } | ||
|
|
||
| // Prevent rapid restart loops: if the last agent for this project | ||
| // exited quickly (crashed), wait before allowing another start. | ||
| if t, ok := m.lastQuickExit[projectName]; ok && time.Since(t) < restartCooldown { | ||
| log.Printf("Agent start for %s blocked: cooldown after quick exit (%s ago)", projectName, time.Since(t).Round(time.Second)) | ||
| return nil, nil | ||
| } | ||
|
|
||
| proj := m.cache.FindProject(projectName) | ||
| if proj == nil { | ||
| return nil, fmt.Errorf("project %q not found", projectName) | ||
|
|
@@ -161,7 +180,17 @@ func (m *Manager) Start(projectName string) (*Agent, error) { | |
| logFile.Close() | ||
| os.Remove(mcpConfigPath) | ||
| close(agent.done) | ||
| log.Printf("Agent exited for %s (PID %d): %v", projectName, agent.PID, agent.exitErr) | ||
|
|
||
| runDuration := time.Since(agent.StartedAt) | ||
| log.Printf("Agent exited for %s (PID %d, ran %s): %v", projectName, agent.PID, runDuration.Round(time.Second), agent.exitErr) | ||
|
|
||
| // Track quick exits to prevent rapid restart loops | ||
| if runDuration < quickExitThreshold { | ||
| m.mu.Lock() | ||
| m.lastQuickExit[projectName] = time.Now() | ||
| m.mu.Unlock() | ||
|
Comment on lines
+188
to
+191
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
This branch marks every short-lived run as a "quick exit" without checking why the process ended, so a user-initiated stop (or any normal short run) within 15s still sets cooldown and blocks restart attempts for 30s. In practice, clicking Stop shortly after startup can prevent immediate restart even though nothing crashed; cooldown tracking should be limited to crash-like exits instead of all short durations. Useful? React with 👍 / 👎. |
||
| log.Printf("Agent for %s exited quickly (%s) — restart cooldown active for %s", projectName, runDuration.Round(time.Millisecond), restartCooldown) | ||
| } | ||
|
|
||
| m.comments.ClearProjectHeartbeats(projectName) | ||
| m.comments.ClearProjectWorking(projectName) | ||
|
|
@@ -219,6 +248,7 @@ type AgentStatus struct { | |
| PID int `json:"pid"` | ||
| StartedAt time.Time `json:"startedAt"` | ||
| Running bool `json:"running"` | ||
| Cooldown bool `json:"cooldown,omitempty"` // true when agent recently crashed and restarts are blocked | ||
| ContextWindow int `json:"contextWindow"` | ||
| ContextUsed int `json:"contextUsed"` | ||
| ContextPercent float64 `json:"contextPercent"` | ||
|
|
@@ -233,6 +263,13 @@ func (m *Manager) Status(projectName string) *AgentStatus { | |
|
|
||
| agent, ok := m.agents[projectName] | ||
| if !ok { | ||
| // No agent exists — check if we're in cooldown | ||
| if t, ok := m.lastQuickExit[projectName]; ok && time.Since(t) < restartCooldown { | ||
| return &AgentStatus{ | ||
| Project: projectName, | ||
| Cooldown: true, | ||
| } | ||
| } | ||
| return nil | ||
| } | ||
|
|
||
|
|
@@ -284,6 +321,15 @@ func (m *Manager) SimulateFinished(projectName string) { | |
| } | ||
| } | ||
|
|
||
| // SetQuickExit records a quick exit for the given project, activating the | ||
| // restart cooldown. This is intended for testing the cooldown behavior | ||
| // without requiring an actual agent process. | ||
| func (m *Manager) SetQuickExit(projectName string) { | ||
| m.mu.Lock() | ||
| defer m.mu.Unlock() | ||
| m.lastQuickExit[projectName] = time.Now() | ||
| } | ||
|
|
||
| // StopAll terminates all running agents (for server shutdown). | ||
| func (m *Manager) StopAll() { | ||
| m.mu.Lock() | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The 30s auto-start throttle uses a single timestamp for the whole FilePage instance, so an auto-start in one project suppresses auto-start in another project visited shortly after. When users switch files/projects quickly,
needsAgent=truefor project B can be skipped by this guard even though B has never attempted a start; the throttle state needs to be keyed by project (or reset on project change).Useful? React with 👍 / 👎.