Skip to content
This repository was archived by the owner on Apr 3, 2026. It is now read-only.

Commit d98c14a

Browse files
committed
Merge branch 'feature/hyperhealth-service'
2 parents 8850335 + 25a76d5 commit d98c14a

28 files changed

Lines changed: 2458 additions & 1148 deletions

README_INFRASTRUCTURE_UPGRADE.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -531,4 +531,4 @@ Before starting implementation:
531531

532532
*This infrastructure upgrade transforms Hypercode from manual deployments to enterprise-grade zero-downtime deployments with 99.99% SLA, 9x faster MTTR, and comprehensive observability—all within 12 weeks.*
533533

534-
**Let's get started! 🚀**
534+
*Let's get started! 🚀*

agents/healer/main.py

Lines changed: 25 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,9 @@
11
# ✅ STDLIB — complete clean import block
2-
from .mape_k_bootstrap import start_mape_k
32
import asyncio
43
import json
54
import logging
65
import os
6+
import sys
77
import time
88
from collections import defaultdict
99
from contextlib import asynccontextmanager
@@ -12,13 +12,16 @@
1212
from typing import Any, Callable, Coroutine, Dict, List, Optional
1313
from urllib.parse import urlparse
1414

15+
# 📍 Ensure /app/healer is on sys.path for absolute imports of mape_k_* modules
16+
sys.path.insert(0, os.path.dirname(__file__))
17+
1518
# Third party
1619
import httpx
1720
import redis.asyncio as redis
1821
from fastapi import FastAPI, HTTPException
1922
from pydantic import BaseModel
2023

21-
# Local
24+
# Local (relative imports for files in the same package)
2225
from .adapters.docker_adapter import DockerAdapter
2326
from .metrics import init_metrics
2427
from .models import HealResult, HealerException, HealRequest
@@ -27,12 +30,12 @@
2730
from agents.shared.agent_message import HealEvent, AgentMessage
2831
from agents.shared.event_bus import AgentEventBus
2932

30-
# ── MAPE-K router pre-registration (MUST happen before app starts) ─────────────────────
31-
from .mape_k_api import router as mape_k_router
32-
from .mape_k_engine import KnowledgeBase, mape_k_loop, DEFAULT_SERVICES
33-
from .mape_k_api import set_knowledge_base
33+
# ── MAPE-K imports (absolute — resolved via sys.path above) ────────────────────────────────────
34+
from mape_k_api import router as mape_k_router
35+
from mape_k_engine import KnowledgeBase, mape_k_loop, DEFAULT_SERVICES
36+
from mape_k_api import set_knowledge_base
3437

35-
# ── Logging ──────────────────────────────────────────────────────────────────────────────
38+
# ── Logging ────────────────────────────────────────────────────────────────────────────────────
3639
class JSONFormatter(logging.Formatter):
3740
def format(self, record):
3841
log_data = {
@@ -55,7 +58,7 @@ def format(self, record):
5558
logger.addHandler(handler)
5659
logger.setLevel(logging.INFO)
5760

58-
# ── Config ──────────────────────────────────────────────────────────────────────────────
61+
# ── Config ────────────────────────────────────────────────────────────────────────────────────
5962
REDIS_URL = os.getenv("REDIS_URL", "redis://redis:6379")
6063
ORCHESTRATOR_URL = os.getenv("ORCHESTRATOR_URL", "http://crew-orchestrator:8080")
6164
WATCHDOG_ENABLED = os.getenv("HEALER_WATCHDOG_ENABLED", "false").strip().lower() == "true"
@@ -66,22 +69,22 @@ def format(self, record):
6669
WATCHDOG_FORCE_RESTART = os.getenv("HEALER_WATCHDOG_FORCE_RESTART", "false").strip().lower() == "true"
6770
HEALER_AGENT_ID = os.getenv("HEALER_AGENT_ID", "healer-01")
6871

69-
# ── Global state ───────────────────────────────────────────────────────────────────────
72+
# ── Global state ───────────────────────────────────────────────────────────────────────────
7073
redis_client: Optional[redis.Redis] = None
7174
docker_adapter: Optional[DockerAdapter] = None
7275
_throttle_paused_local: Dict[str, float] = {}
7376
event_bus: Optional[AgentEventBus] = None # │ Phase 3 │
7477

7578

76-
# ── Models ──────────────────────────────────────────────────────────────────────────────
79+
# ── Models ────────────────────────────────────────────────────────────────────────────────────
7780
class ThrottleStateUpdate(BaseModel):
7881
containers: List[str]
7982
paused: bool = True
8083
ttl_seconds: int = 900
8184
reason: Optional[str] = None
8285

8386

84-
# ── Throttle helpers ──────────────────────────────────────────────────────────
87+
# ── Throttle helpers ──────────────────────────────────────────────────────────────────────────────
8588
def _throttle_pause_key(container: str) -> str:
8689
return f"throttle:paused:{container}"
8790

@@ -147,7 +150,7 @@ async def get_throttle_state() -> Dict[str, Any]:
147150
return {"containers": sorted(set(containers)), "backend": "memory"}
148151

149152

150-
# ── Circuit Breaker ───────────────────────────────────────────────────────────────────────
153+
# ── Circuit Breaker ───────────────────────────────────────────────────────────────────────────────────
151154
class CircuitState(Enum):
152155
CLOSED = "closed"
153156
OPEN = "open"
@@ -198,7 +201,7 @@ def on_failure(self):
198201
circuit_breakers: Dict[str, CircuitBreaker] = defaultdict(CircuitBreaker)
199202

200203

201-
# ── Lifespan ──────────────────────────────────────────────────────────────────────────────
204+
# ── Lifespan ────────────────────────────────────────────────────────────────────────────────────
202205
@asynccontextmanager
203206
async def lifespan(app: FastAPI):
204207
global redis_client, docker_adapter, event_bus
@@ -214,7 +217,7 @@ async def lifespan(app: FastAPI):
214217
logger.warning(f"⚠️ EventBus failed to connect (non-fatal): {e}")
215218
event_bus = None
216219

217-
# 🧠 Start MAPE-K background loop (router already registered at module load)
220+
# 🧠 Start MAPE-K background loop
218221
kb = KnowledgeBase()
219222
set_knowledge_base(kb)
220223
asyncio.create_task(
@@ -238,7 +241,7 @@ async def lifespan(app: FastAPI):
238241
logger.info("Healer Agent shutting down")
239242

240243

241-
# ── App ─────────────────────────────────────────────────────────────────────────────────
244+
# ── App ──────────────────────────────────────────────────────────────────────────────────────
242245
app = FastAPI(
243246
title="Healer Agent",
244247
version="0.3.0",
@@ -247,12 +250,12 @@ async def lifespan(app: FastAPI):
247250
)
248251
init_metrics(app)
249252

250-
# 🧠 MAPE-K routes registered HERE — before the app starts serving!
253+
# 🧠 MAPE-K routes registered before app starts serving
251254
app.include_router(mape_k_router)
252255
logger.info("✅ MAPE-K API routes registered at /mape-k/*")
253256

254257

255-
# ── Phase 3: Event bus publish helper ───────────────────────────────────────────────
258+
# ── Phase 3: Event bus publish helper ───────────────────────────────────────────────────────────
256259
async def _publish_heal_event(
257260
healed_agent: str,
258261
heal_pattern: str,
@@ -263,7 +266,7 @@ async def _publish_heal_event(
263266
if not event_bus:
264267
return
265268
try:
266-
xp = 50 if status == "recovered" else 5
269+
xp = 50 if status == "recovered" else 5
267270
coins = 10.0 if status == "recovered" else 1.0
268271
event = HealEvent(
269272
agent_id=HEALER_AGENT_ID,
@@ -282,7 +285,7 @@ async def _publish_heal_event(
282285
logger.warning(f"EventBus publish failed (non-fatal): {e}")
283286

284287

285-
# ── System helpers ──────────────────────────────────────────────────────────────────────
288+
# ── System helpers ────────────────────────────────────────────────────────────────────────────────
286289
async def fetch_system_health() -> Dict[str, Dict[Any, Any]]:
287290
try:
288291
async with httpx.AsyncClient(timeout=10.0) as client:
@@ -324,7 +327,7 @@ async def fetch_agent_roster() -> Dict[str, str]:
324327
return {}
325328

326329

327-
# ── Watchdog ──────────────────────────────────────────────────────────────────────────────
330+
# ── Watchdog ──────────────────────────────────────────────────────────────────────────────────────
328331
async def watchdog_cycle() -> None:
329332
if not WATCHDOG_SMOKE_API_KEY:
330333
return
@@ -384,7 +387,7 @@ async def watchdog_loop() -> None:
384387
await asyncio.sleep(max(WATCHDOG_INTERVAL_SECONDS, 5.0))
385388

386389

387-
# ── Healing logic ──────────────────────────────────────────────────────────────────────
390+
# ── Healing logic ──────────────────────────────────────────────────────────────────────────────────
388391
async def ping_agent_health(agent_url: str, timeout: float) -> bool:
389392
try:
390393
async with httpx.AsyncClient(timeout=timeout) as client:
@@ -511,7 +514,7 @@ async def alert_listener():
511514
logger.warning("Alert listener stopped")
512515

513516

514-
# ── Routes ──────────────────────────────────────────────────────────────────────────────
517+
# ── Routes ──────────────────────────────────────────────────────────────────────────────────────
515518
@app.get("/health")
516519
async def health():
517520
docker_ok = False

agents/healer/main.py.backup

Lines changed: 0 additions & 162 deletions
This file was deleted.

0 commit comments

Comments
 (0)