Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
60 changes: 31 additions & 29 deletions src/bindings.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5,60 +5,62 @@
namespace py = pybind11;

PYBIND11_MODULE(SimpleHNSW, m) {
m.doc() = "SimpleHNSW - A simple HNSW (Hierarchical Navigable Small World) implementation for approximate nearest neighbor search";
m.doc() = "SimpleHNSW - A simple HNSW implementation for approximate nearest neighbor search";

py::class_<SimpleHNSWIndex>(m, "SimpleHNSWIndex")
.def(py::init<int, double, int, int>(),
py::arg("L") = 5,
py::arg("mL") = 0.62,
py::arg("efc") = 10,
py::arg("maxConnections") = 16,
R"doc(
.def(py::init<int, double, int, int, unsigned int>(),
py::arg("L") = 5,
py::arg("mL") = 0.62,
py::arg("efc") = 10,
py::arg("maxConnections") = 16,
py::arg("seed") = 0u,
R"doc(
Initialize a SimpleHNSW index.

Args:
L (int): Number of layers in the hierarchical graph (default: 5)
L (int): Number of layers (default: 5)
mL (float): Normalization factor for layer assignment (default: 0.62)
efc (int): Size of the dynamic candidate list during construction (default: 10)
maxConnections (int): Maximum number of connections per node (default: 16)
)doc")
seed (int): RNG seed (0 => non-deterministic)
)doc")
.def("insert", &SimpleHNSWIndex::insert,
py::arg("vector"),
R"doc(
py::arg("vector"),
R"doc(
Insert a vector into the index.

Args:
vector (list[float]): The vector to insert
)doc")
)doc")
.def("search", &SimpleHNSWIndex::search,
py::arg("query"),
py::arg("ef") = 1,
R"doc(
py::arg("query"),
py::arg("ef") = 1,
R"doc(
Search for the nearest neighbors of a query vector.

Args:
query (list[float]): The query vector
ef (int): Size of the dynamic candidate list during search (default: 1)

Returns:
list[tuple[float, int]]: List of (distance, index) pairs for nearest neighbors
)doc")
)doc")
.def("toJSON", &SimpleHNSWIndex::toJSON,
R"doc(
R"doc(
Serialize the index to a JSON string.

Returns:
str: JSON representation of the index
)doc")
)doc")
.def_static("fromJSON", &SimpleHNSWIndex::fromJSON,
py::arg("json"),
R"doc(
py::arg("json"),
R"doc(
Deserialize an index from a JSON string.

Args:
json (str): JSON representation of the index

Returns:
SimpleHNSWIndex: Deserialized index
)doc");
)doc");
}
16 changes: 5 additions & 11 deletions src/cache.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4,22 +4,16 @@
#include <chrono>
#include "lru_cache.h"

// Provide a thread-safe initialization via function-local static
class Cache {
private:
static std::unique_ptr<LRUCache<std::string, std::vector<std::any>>> instance;

Cache() {}

public:
// Return a reference to a process-wide cache instance.
// Function-local static ensures thread-safe initialization (C++11+).
static LRUCache<std::string, std::vector<std::any>>& getInstance(size_t max = 10000, std::chrono::milliseconds maxAge = std::chrono::milliseconds(1000 * 60 * 10)) {
if (!instance) {
instance = std::make_unique<LRUCache<std::string, std::vector<std::any>>>(max, maxAge);
}
return *instance;
static LRUCache<std::string, std::vector<std::any>> instance(max, maxAge);
return instance;
}
};

// Initialize the static member
std::unique_ptr<LRUCache<std::string, std::vector<std::any>>> Cache::instance = nullptr;


138 changes: 73 additions & 65 deletions src/lru_cache.h
Original file line number Diff line number Diff line change
Expand Up @@ -3,91 +3,99 @@

#include <unordered_map>
#include <list>
#include <stdexcept>
#include <chrono>
#include <memory>
#include <vector>
#include <string>
#include <any>
#include <mutex>
#include <optional>
#include <stdexcept>
#include <utility>
#include <algorithm>

// LRUCache template class
template<typename Key, typename Value>
class LRUCache {
private:
using Timestamp = std::chrono::steady_clock::time_point;

struct CacheItem {
Value value;
Timestamp timestamp;
};

std::list<std::pair<Key, CacheItem>> itemList;
std::unordered_map<Key, typename decltype(itemList)::iterator> itemMap;
size_t maxSize;
std::chrono::milliseconds maxAge;

void moveToFront(typename decltype(itemList)::iterator it) {
itemList.splice(itemList.begin(), itemList, it);
}

void evict() {
while (itemList.size() > maxSize || (maxAge.count() > 0 && !itemList.empty() &&
std::chrono::duration_cast<std::chrono::milliseconds>(std::chrono::steady_clock::now() - itemList.back().second.timestamp).count() > maxAge.count())) {
itemMap.erase(itemList.back().first);
itemList.pop_back();
}
}

public:
using Clock = std::chrono::steady_clock;
using Timestamp = Clock::time_point;

LRUCache(size_t maxSize, std::chrono::milliseconds maxAge = std::chrono::milliseconds(0))
: maxSize(maxSize), maxAge(maxAge) {}
: maxSize_(std::max<size_t>(1, maxSize)), maxAge_(maxAge) {}

// Put (copy)
void put(const Key& key, const Value& value) {
auto now = std::chrono::steady_clock::now();
auto it = itemMap.find(key);
if (it != itemMap.end()) {
it->second->second.value = value;
it->second->second.timestamp = now;
moveToFront(it->second);
} else {
itemList.push_front({ key, { value, now } });
itemMap[key] = itemList.begin();
}
evict();
put_impl(key, value);
}

Value get(const Key& key) {
auto it = itemMap.find(key);
if (it == itemMap.end()) {
throw std::runtime_error("Key not found");
}
moveToFront(it->second);
// Put (move)
void put(Key&& key, Value&& value) {
put_impl(std::move(key), std::move(value));
}

// Get: returns optional<Value> to avoid throwing for missing keys
std::optional<Value> get(const Key& key) {
std::lock_guard<std::mutex> lock(mutex_);
auto it = map_.find(key);
if (it == map_.end()) return std::nullopt;
// move node to front
item_list_.splice(item_list_.begin(), item_list_, it->second);
it->second->second.timestamp = Clock::now();
return it->second->second.value;
}

bool contains(const Key& key) const {
return itemMap.find(key) != itemMap.end();
std::lock_guard<std::mutex> lock(mutex_);
return map_.find(key) != map_.end();
}

size_t size() const {
return itemMap.size();
std::lock_guard<std::mutex> lock(mutex_);
return map_.size();
}

void clear() {
std::lock_guard<std::mutex> lock(mutex_);
item_list_.clear();
map_.clear();
}
};

// Cache singleton class
// class Cache {
// private:
// static std::unique_ptr<LRUCache<std::string, std::vector<std::any>>> instance;
private:
struct CacheItem {
Value value;
Timestamp timestamp;
};

// Cache() {}
using ListIt = typename std::list<std::pair<Key, CacheItem>>::iterator;

// public:
// static LRUCache<std::string, std::vector<std::any>>& getInstance(size_t max = 10000, std::chrono::milliseconds maxAge = std::chrono::milliseconds(1000 * 60 * 10)) {
// if (!instance) {
// instance = std::make_unique<LRUCache<std::string, std::vector<std::any>>>(max, maxAge);
// }
// return *instance;
// }
// };
template<typename K, typename V>
void put_impl(K&& key, V&& value) {
std::lock_guard<std::mutex> lock(mutex_);
auto now = Clock::now();
auto it = map_.find(key);
if (it != map_.end()) {
// Update existing
it->second->second.value = std::forward<V>(value);
it->second->second.timestamp = now;
item_list_.splice(item_list_.begin(), item_list_, it->second);
} else {
// Insert new
item_list_.emplace_front(std::forward<K>(key), CacheItem{ std::forward<V>(value), now });
map_[item_list_.begin()->first] = item_list_.begin();
evict_if_needed();
}
}

void evict_if_needed() {
while (item_list_.size() > maxSize_ ||
(maxAge_.count() > 0 && !item_list_.empty() &&
std::chrono::duration_cast<std::chrono::milliseconds>(Clock::now() - item_list_.back().second.timestamp) > maxAge_)) {
map_.erase(item_list_.back().first);
item_list_.pop_back();
}
}

mutable std::mutex mutex_;
std::list<std::pair<Key, CacheItem>> item_list_;
std::unordered_map<Key, ListIt> map_;
size_t maxSize_;
std::chrono::milliseconds maxAge_;
};

#endif // LRU_CACHE_H
39 changes: 8 additions & 31 deletions src/not_implemented_exception.h
Original file line number Diff line number Diff line change
@@ -1,41 +1,18 @@
#ifndef NOT_IMPLEMENTED_EXCEPTION_H
#define NOT_IMPLEMENTED_EXCEPTION_H

#include <stdexcept>
#include <string>

class NotImplementedException : public std::logic_error
{
private:

std::string _text;

NotImplementedException(const char* message, const char* function)
:
std::logic_error("Not Implemented")
{
_text = message;
_text += " : ";
_text += function;
};

class NotImplementedException : public std::logic_error {
public:
explicit NotImplementedException(const std::string& message = "Not Implemented")
: std::logic_error(message) {}

NotImplementedException()
:
NotImplementedException("Not Implememented", __FUNCTION__)
{
}

NotImplementedException(const char* message)
:
NotImplementedException(message, __FUNCTION__)
{
}

virtual const char *what() const throw()
{
return _text.c_str();
// Use noexcept-qualified what() override for compatibility and clarity
const char* what() const noexcept override {
return std::logic_error::what();
}
};

#endif //NOT_IMPLEMENTED_EXCEPTION_H
#endif // NOT_IMPLEMENTED_EXCEPTION_H
Loading