Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions .github/pull_request_template.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
## Summary

> Write this PR in English only. Do not include Chinese/Japanese/Korean characters.

Provide a concise summary of the change and the user-facing impact.

## Motivation
Expand Down
5 changes: 5 additions & 0 deletions .github/workflows/pr-language.yml
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,13 @@ jobs:

// Basic CJK detection to block non-English PR metadata.
const cjkPattern = /[\u3040-\u30ff\u3400-\u4dbf\u4e00-\u9fff\uf900-\ufaff]/;
const cjkPatternGlobal = /[\u3040-\u30ff\u3400-\u4dbf\u4e00-\u9fff\uf900-\ufaff]/g;

if (cjkPattern.test(text)) {
const offending = [...new Set((text.match(cjkPatternGlobal) || []))].slice(0, 12).join(" ");
core.error(`Detected non-English characters in PR metadata. Offending sample: ${offending || "(unable to extract sample)"}`);
core.error("Please edit the PR title and body and remove CJK characters (Chinese/Japanese/Korean). ");
core.error("Tip: paste your final text into .github/pull_request_template.md sections and keep all content English-only.");
core.setFailed(
"PR title/body must be written in English only. Please remove non-English characters and resubmit."
);
Expand Down
1 change: 1 addition & 0 deletions CONTRIBUTING.md
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@ Use clear, imperative subjects, for example:

## Pull request checklist

- [ ] PR title and PR body contain English text only (no non-English characters)
- [ ] Tests or runnable validation included
- [ ] README/docs updated when needed
- [ ] CHANGELOG entry added
Expand Down
53 changes: 31 additions & 22 deletions hippocortex/__init__.py
Original file line number Diff line number Diff line change
@@ -1,18 +1,20 @@
from __future__ import annotations

import logging
from dataclasses import dataclass

from hippocortex.config import HippoConfig
from hippocortex.consolidation.replay import ReplayConsolidator
from hippocortex.cortex.semantic_store import InMemorySemanticStore, SQLiteSemanticStore
from hippocortex.embedders.base import Embedder
from hippocortex.embedders.dummy_embedder import DummyEmbedder
from hippocortex.hippo.episodic_store import SQLiteEpisodicStore
from hippocortex.router import MemoryRouter
from hippocortex.registry import get_consolidation_strategy, get_router_strategy, get_storage_backend, register_defaults
from hippocortex.types import ConsolidationOutput, ContextPack
from hippocortex.working_memory import WorkingMemory


logger = logging.getLogger(__name__)


class CortexAPI:
def __init__(self, sdk: "HippoCortex") -> None:
self._sdk = sdk
Expand All @@ -28,36 +30,43 @@ class HippoCortex:
embedder: Embedder

def __post_init__(self) -> None:
register_defaults()
self.config.validate()
self.config.ensure_parent_dir()
self.hippo = SQLiteEpisodicStore(self.config.db_path)
semantic_backend = self.config.semantic_store_backend.lower()
if semantic_backend == "memory":
self.semantic_store = InMemorySemanticStore(self.embedder.dimension)
elif semantic_backend == "sqlite":
semantic_db_path = self.config.semantic_store_db_path or self.config.db_path
self.semantic_store = SQLiteSemanticStore(db_path=semantic_db_path, dimension=self.embedder.dimension)
else:
raise ValueError(f"Unknown semantic store backend: {self.config.semantic_store_backend}")

if self.embedder.dimension != self.config.model.embedding_dim:
raise ValueError(
f"Invalid config: embedder dimension {self.embedder.dimension} does not match "
f"model.embedding_dim {self.config.model.embedding_dim}"
)

self.hippo = SQLiteEpisodicStore(self.config.storage.db_path)
semantic_backend = self.config.storage.semantic_store_backend.lower()
self.semantic_store = get_storage_backend(semantic_backend, self.config, self.embedder.dimension)
self.cortex = CortexAPI(self)
self.router = MemoryRouter()
self.working_memory = WorkingMemory(max_recent_turns=self.config.working_memory_turns)
self._consolidator = ReplayConsolidator(replay_size=self.config.replay_episodes)
self.router = get_router_strategy(self.config.router.strategy)
self.working_memory = WorkingMemory(max_recent_turns=self.config.runtime.working_memory_turns)
self._consolidators: dict[str, object] = {}

logger.info("HippoCortex effective config: %s", self.config.as_dict())

@classmethod
def default(cls, config: HippoConfig | None = None, embedder: Embedder | None = None) -> "HippoCortex":
cfg = config or HippoConfig.from_env()
emb = embedder or DummyEmbedder(dimension=cfg.embedding_dim)
cfg = HippoConfig.from_env().merged(config)
emb = embedder or DummyEmbedder(dimension=cfg.model.embedding_dim)
return cls(config=cfg, embedder=emb)

def consolidate(self, agent_id: str, session_id: str | None = None, strategy: str = "replay_v1") -> ConsolidationOutput:
if strategy != "replay_v1":
raise ValueError(f"Unknown consolidation strategy: {strategy}")
episodes = self._consolidator.select_episodes(self.hippo, agent_id=agent_id, session_id=session_id)
return self._consolidator.run(agent_id=agent_id, episodes=episodes, embedder=self.embedder, semantic_store=self.semantic_store)
consolidator = self._consolidators.get(strategy)
if consolidator is None:
consolidator = get_consolidation_strategy(strategy, self.config)
self._consolidators[strategy] = consolidator
episodes = consolidator.select_episodes(self.hippo, agent_id=agent_id, session_id=session_id)
return consolidator.run(agent_id=agent_id, episodes=episodes, embedder=self.embedder, semantic_store=self.semantic_store)

def build_context(self, agent_id: str, session_id: str, user_message: str, max_tokens: int) -> ContextPack:
decision = self.router.route(user_message=user_message, max_tokens=max_tokens)
recent_events = self.hippo.list_events(agent_id=agent_id, session_id=session_id, limit=self.config.working_memory_turns)
recent_events = self.hippo.list_events(agent_id=agent_id, session_id=session_id, limit=self.config.runtime.working_memory_turns)
selected_recent = self.working_memory.select_recent(recent_events, token_budget=decision.working_memory_tokens)

semantic_notes = []
Expand Down
127 changes: 116 additions & 11 deletions hippocortex/config.py
Original file line number Diff line number Diff line change
@@ -1,36 +1,141 @@
from __future__ import annotations

import os
from dataclasses import dataclass
from dataclasses import asdict, dataclass, field
from pathlib import Path
from typing import Any


@dataclass(slots=True)
class HippoConfig:
db_path: str = "hippocortex.db"
embedding_dim: int = 128
class RuntimeConfig:
replay_episodes: int = 20
working_memory_turns: int = 12


@dataclass(slots=True)
class StorageConfig:
db_path: str = "hippocortex.db"
semantic_store_backend: str = "memory"
semantic_store_db_path: str | None = None


@dataclass(slots=True)
class ModelConfig:
embedding_dim: int = 128
embedder: str = "dummy"
distill_strategy: str = "auto"


@dataclass(slots=True)
class RouterConfig:
strategy: str = "memory_v1"


@dataclass(slots=True)
class HippoConfig:
runtime: RuntimeConfig = field(default_factory=RuntimeConfig)
storage: StorageConfig = field(default_factory=StorageConfig)
model: ModelConfig = field(default_factory=ModelConfig)
router: RouterConfig = field(default_factory=RouterConfig)

@classmethod
def from_env(cls) -> "HippoConfig":
env = os.getenv("HIPPOCORTEX_ENV", "development").lower()
default_backend = "sqlite" if env == "production" else "memory"
return cls(
db_path=os.getenv("HIPPOCORTEX_DB_PATH", "hippocortex.db"),
embedding_dim=int(os.getenv("HIPPOCORTEX_EMBEDDING_DIM", "128")),
replay_episodes=int(os.getenv("HIPPOCORTEX_REPLAY_EPISODES", "20")),
working_memory_turns=int(os.getenv("HIPPOCORTEX_WORKING_TURNS", "12")),
semantic_store_backend=os.getenv("HIPPOCORTEX_SEMANTIC_STORE_BACKEND", default_backend),
semantic_store_db_path=os.getenv("HIPPOCORTEX_SEMANTIC_STORE_DB_PATH"),
runtime=RuntimeConfig(
replay_episodes=int(os.getenv("HIPPOCORTEX_REPLAY_EPISODES", "20")),
working_memory_turns=int(os.getenv("HIPPOCORTEX_WORKING_TURNS", "12")),
),
storage=StorageConfig(
db_path=os.getenv("HIPPOCORTEX_DB_PATH", "hippocortex.db"),
semantic_store_backend=os.getenv("HIPPOCORTEX_SEMANTIC_STORE_BACKEND", default_backend),
semantic_store_db_path=os.getenv("HIPPOCORTEX_SEMANTIC_STORE_DB_PATH"),
),
model=ModelConfig(
embedding_dim=int(os.getenv("HIPPOCORTEX_EMBEDDING_DIM", "128")),
embedder=os.getenv("HIPPOCORTEX_EMBEDDER", "dummy"),
distill_strategy=os.getenv("HIPPOCORTEX_DISTILL_STRATEGY", "auto"),
),
router=RouterConfig(strategy=os.getenv("HIPPOCORTEX_ROUTER_STRATEGY", "memory_v1")),
)

def merged(self, override: "HippoConfig | None" = None) -> "HippoConfig":
if override is None:
return self
return HippoConfig(
runtime=RuntimeConfig(**{**asdict(self.runtime), **asdict(override.runtime)}),
storage=StorageConfig(**{**asdict(self.storage), **asdict(override.storage)}),
model=ModelConfig(**{**asdict(self.model), **asdict(override.model)}),
router=RouterConfig(**{**asdict(self.router), **asdict(override.router)}),
)

def ensure_parent_dir(self) -> None:
for raw_path in [self.db_path, self.semantic_store_db_path]:
for raw_path in [self.storage.db_path, self.storage.semantic_store_db_path]:
if not raw_path:
continue
path = Path(raw_path)
if path.parent and str(path.parent) != ".":
path.parent.mkdir(parents=True, exist_ok=True)

def validate(self) -> None:
if self.model.embedding_dim <= 0:
raise ValueError("embedding_dim must be positive")
if self.runtime.working_memory_turns <= 0:
raise ValueError("working_memory_turns must be positive")
if self.runtime.replay_episodes <= 0:
raise ValueError("replay_episodes must be positive")
if self.storage.semantic_store_backend.lower() == "sqlite" and self.model.embedding_dim < 4:
raise ValueError("sqlite backend requires embedding_dim >= 4")

def as_dict(self) -> dict[str, Any]:
return asdict(self)

# Backward-compatible flat accessors
@property
def db_path(self) -> str:
return self.storage.db_path

@db_path.setter
def db_path(self, value: str) -> None:
self.storage.db_path = value

@property
def embedding_dim(self) -> int:
return self.model.embedding_dim

@embedding_dim.setter
def embedding_dim(self, value: int) -> None:
self.model.embedding_dim = value

@property
def replay_episodes(self) -> int:
return self.runtime.replay_episodes

@replay_episodes.setter
def replay_episodes(self, value: int) -> None:
self.runtime.replay_episodes = value

@property
def working_memory_turns(self) -> int:
return self.runtime.working_memory_turns

@working_memory_turns.setter
def working_memory_turns(self, value: int) -> None:
self.runtime.working_memory_turns = value

@property
def semantic_store_backend(self) -> str:
return self.storage.semantic_store_backend

@semantic_store_backend.setter
def semantic_store_backend(self, value: str) -> None:
self.storage.semantic_store_backend = value

@property
def semantic_store_db_path(self) -> str | None:
return self.storage.semantic_store_db_path

@semantic_store_db_path.setter
def semantic_store_db_path(self, value: str | None) -> None:
self.storage.semantic_store_db_path = value
11 changes: 7 additions & 4 deletions hippocortex/consolidation/replay.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,14 @@
from __future__ import annotations

from hippocortex.consolidation.distill import distill_episodes
from hippocortex.types import ConsolidationOutput, SemanticNote
from hippocortex.utils.hashing import stable_id


class ReplayConsolidator:
def __init__(self, replay_size: int = 20) -> None:
def __init__(self, replay_size: int = 20, distill_strategy: str = "auto", strategy_name: str = "replay_v1") -> None:
self.replay_size = replay_size
self.distill_strategy = distill_strategy
self.strategy_name = strategy_name

def select_episodes(self, store, agent_id: str, session_id: str | None = None) -> list:
by_importance = store.top_events_by_importance(agent_id=agent_id, session_id=session_id, limit=self.replay_size)
Expand All @@ -16,8 +17,10 @@ def select_episodes(self, store, agent_id: str, session_id: str | None = None) -
return store.list_events(agent_id=agent_id, session_id=session_id, limit=self.replay_size)

def run(self, agent_id: str, episodes: list, embedder, semantic_store) -> ConsolidationOutput:
strategy = "replay_v1"
facts = distill_episodes(episodes)
from hippocortex.registry import get_distill_strategy

strategy = self.strategy_name
facts = get_distill_strategy(self.distill_strategy)(episodes)
episode_ids = [ep.id for ep in episodes if ep.id is not None]
episode_ids_sorted = sorted(episode_ids)
run_basis = f"{agent_id}:{episode_ids_sorted}:{strategy}"
Expand Down
95 changes: 95 additions & 0 deletions hippocortex/registry.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,95 @@
from __future__ import annotations

from typing import Callable

from hippocortex.config import HippoConfig
from hippocortex.consolidation.distill import distill_episodes, heuristic_distill, llm_distill
from hippocortex.consolidation.replay import ReplayConsolidator
from hippocortex.cortex.semantic_store import InMemorySemanticStore, SQLiteSemanticStore
from hippocortex.router import MemoryRouter

RouterFactory = Callable[[], object]
DistillStrategy = Callable[[list], list[str]]
StorageFactory = Callable[[HippoConfig, int], object]
ConsolidationFactory = Callable[[HippoConfig], object]

_ROUTER_REGISTRY: dict[str, RouterFactory] = {}
_DISTILL_REGISTRY: dict[str, DistillStrategy] = {}
_STORAGE_REGISTRY: dict[str, StorageFactory] = {}
_CONSOLIDATION_REGISTRY: dict[str, ConsolidationFactory] = {}


def register_router_strategy(name: str, factory: RouterFactory) -> None:
_ROUTER_REGISTRY[name] = factory


def get_router_strategy(name: str):
try:
return _ROUTER_REGISTRY[name]()
except KeyError as exc:
raise ValueError(f"Unknown router strategy: {name}") from exc


def register_distill_strategy(name: str, strategy: DistillStrategy) -> None:
_DISTILL_REGISTRY[name] = strategy


def get_distill_strategy(name: str) -> DistillStrategy:
try:
return _DISTILL_REGISTRY[name]
except KeyError as exc:
raise ValueError(f"Unknown distill strategy: {name}") from exc


def register_storage_backend(name: str, factory: StorageFactory) -> None:
_STORAGE_REGISTRY[name] = factory


def get_storage_backend(name: str, config: HippoConfig, dimension: int):
try:
return _STORAGE_REGISTRY[name](config, dimension)
except KeyError as exc:
raise ValueError(f"Unknown semantic store backend: {name}") from exc


def register_consolidation_strategy(name: str, factory: ConsolidationFactory) -> None:
_CONSOLIDATION_REGISTRY[name] = factory


def get_consolidation_strategy(name: str, config: HippoConfig):
try:
return _CONSOLIDATION_REGISTRY[name](config)
except KeyError as exc:
raise ValueError(f"Unknown consolidation strategy: {name}") from exc


def _sqlite_factory(config: HippoConfig, dimension: int):
semantic_db_path = config.storage.semantic_store_db_path or config.storage.db_path
return SQLiteSemanticStore(db_path=semantic_db_path, dimension=dimension)


def _memory_factory(config: HippoConfig, dimension: int):
return InMemorySemanticStore(dimension)


def _replay_factory(config: HippoConfig):
return ReplayConsolidator(
replay_size=config.runtime.replay_episodes,
distill_strategy=config.model.distill_strategy,
strategy_name="replay_v1",
)


def register_defaults() -> None:
if _ROUTER_REGISTRY:
return
register_router_strategy("memory_v1", MemoryRouter)

register_distill_strategy("auto", distill_episodes)
register_distill_strategy("heuristic", heuristic_distill)
register_distill_strategy("llm", llm_distill)

register_storage_backend("memory", _memory_factory)
register_storage_backend("sqlite", _sqlite_factory)

register_consolidation_strategy("replay_v1", _replay_factory)
Loading
Loading