Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ perf.data.old
perf.out
ndb.svg
flamegraph.html
testdata/sg_test_db
testdata/many-events.json
testdata/db/ndb-v0.tar
testdata/db/v0
Expand All @@ -39,3 +40,4 @@ ndb

/target
/Cargo.lock
test_socialgraph
18 changes: 14 additions & 4 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,10 @@ CFLAGS = -Wall -Wno-misleading-indentation -Wno-unused-function -Werror -O2 -g -
BOLT11_HDRS := src/bolt11/amount.h src/bolt11/bech32.h src/bolt11/bech32_util.h src/bolt11/bolt11.h src/bolt11/debug.h src/bolt11/error.h src/bolt11/hash_u5.h src/bolt11/node_id.h src/bolt11/overflows.h
CCAN_SRCS := ccan/ccan/utf8/utf8.c ccan/ccan/tal/tal.c ccan/ccan/tal/str/str.c ccan/ccan/list/list.c ccan/ccan/mem/mem.c ccan/ccan/crypto/sha256/sha256.c ccan/ccan/take/take.c
CCAN_HDRS := ccan/ccan/utf8/utf8.h ccan/ccan/container_of/container_of.h ccan/ccan/check_type/check_type.h ccan/ccan/str/str.h ccan/ccan/tal/str/str.h ccan/ccan/tal/tal.h ccan/ccan/list/list.h ccan/ccan/structeq/structeq.h ccan/ccan/typesafe_cb/typesafe_cb.h ccan/ccan/short_types/short_types.h ccan/ccan/mem/mem.h ccan/ccan/likely/likely.h ccan/ccan/alignof/alignof.h ccan/ccan/crypto/sha256/sha256.h ccan/ccan/array_size/array_size.h ccan/ccan/endian/endian.h ccan/ccan/take/take.h ccan/ccan/build_assert/build_assert.h ccan/ccan/cppmagic/cppmagic.h
HEADERS = deps/lmdb/lmdb.h deps/secp256k1/include/secp256k1.h src/nostrdb.h src/cursor.h src/hex.h src/jsmn.h src/config.h src/random.h src/memchr.h src/cpu.h src/nostr_bech32.h src/block.h src/str_block.h src/print_util.h $(C_BINDINGS) $(CCAN_HDRS) $(BOLT11_HDRS)
HEADERS = deps/lmdb/lmdb.h deps/secp256k1/include/secp256k1.h src/nostrdb.h src/cursor.h src/hex.h src/jsmn.h src/config.h src/random.h src/memchr.h src/cpu.h src/nostr_bech32.h src/block.h src/str_block.h src/print_util.h src/ndb_uid.h src/ndb_socialgraph.h src/bucketed_u32_list.h $(C_BINDINGS) $(CCAN_HDRS) $(BOLT11_HDRS)
FLATCC_SRCS=deps/flatcc/src/runtime/json_parser.c deps/flatcc/src/runtime/verifier.c deps/flatcc/src/runtime/builder.c deps/flatcc/src/runtime/emitter.c deps/flatcc/src/runtime/refmap.c
BOLT11_SRCS = src/bolt11/bolt11.c src/bolt11/bech32.c src/bolt11/amount.c src/bolt11/hash_u5.c
SRCS = src/nostrdb.c src/invoice.c src/nostr_bech32.c src/content_parser.c src/block.c src/binmoji.c src/metadata.c $(BOLT11_SRCS) $(FLATCC_SRCS) $(CCAN_SRCS)
SRCS = src/nostrdb.c src/invoice.c src/nostr_bech32.c src/content_parser.c src/block.c src/binmoji.c src/metadata.c src/ndb_uid.c src/ndb_socialgraph.c src/bucketed_u32_list.c $(BOLT11_SRCS) $(FLATCC_SRCS) $(CCAN_SRCS)
LDS = $(OBJS) $(ARS)
OBJS = $(SRCS:.c=.o)
DEPS = $(OBJS) $(HEADERS) $(ARS)
Expand All @@ -21,14 +21,15 @@ C_BINDINGS_COMMON=$(BINDINGS)/c/flatbuffers_common_builder.h $(BINDINGS)/c/flatb
C_BINDINGS=$(C_BINDINGS_COMMON) $(C_BINDINGS_PROFILE) $(C_BINDINGS_META)
BIN=ndb

SANFLAGS = -fsanitize=leak

# Detect operating system
UNAME_S := $(shell uname -s)

# macOS-specific flags
ifeq ($(UNAME_S),Darwin)
LDFLAGS += -framework Security
SANFLAGS =
else
SANFLAGS = -fsanitize=leak
endif

CHECKDATA=testdata/db/v0/data.mdb
Expand Down Expand Up @@ -191,11 +192,20 @@ testdata/db/.dir:
@mkdir -p testdata/db
touch testdata/db/.dir

testdata/sg_test_db/.dir:
@mkdir -p testdata/sg_test_db
touch testdata/sg_test_db/.dir

test: CFLAGS += $(SANFLAGS) # compile test objects with ASan/UBSan
test: LDFLAGS += $(SANFLAGS) # link test binary with the sanitizer runtime
test: test.c $(DEPS) testdata/db/.dir
$(CC) $(CFLAGS) test.c $(LDS) $(LDFLAGS) -o $@

test_socialgraph: CFLAGS += $(SANFLAGS)
test_socialgraph: LDFLAGS += $(SANFLAGS)
test_socialgraph: test_socialgraph.c $(DEPS) testdata/sg_test_db/.dir
$(CC) $(CFLAGS) test_socialgraph.c $(LDS) $(LDFLAGS) -o $@

# Call this with CCAN_NEW="mod1 mod2..." to add new ccan modules.
update-ccan:
mv ccan ccan.old
Expand Down
88 changes: 88 additions & 0 deletions SOCIAL_GRAPH.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,88 @@
# Social Graph

nostrdb includes an integrated social graph index that builds follow relationships from kind 3 (contact list) events and mute relationships from kind 10000 (mute list) events.

## Architecture

### UID Mapping

Pubkeys (32 bytes) are mapped to locally unique 32-bit integer IDs (UIDs) to reduce index size.

- **Type**: `uint32_t` (4.29 billion user capacity)
- **Storage savings**: 28 bytes per UID reference (32-byte pubkey → 4-byte UID)
- **Memory impact**: For 10M edges with bidirectional indices, saves ~560MB vs using full pubkeys (56 bytes per edge: 28 bytes in forward index + 28 bytes in reverse index)

If you need >4B users, change `typedef uint32_t ndb_uid_t` to `uint64_t` in `src/ndb_uid.h`.

**UID Allocation**: Counter-based (`next_id++`), reconstructed from max existing UID on init. Thread-safe within nostrdb's single-writer architecture. **Not safe for multi-process access**—running multiple nostrdb processes against the same database will cause UID collisions.

### Storage Strategy

**Forward indexes** (bounded by user behavior, typically 100-500 follows):
- Use **bucketed UID lists** - space-optimized arrays partitioned by UID size (u8/u16/u32)
- Key: follower UID → Value: bucketed array of followed UIDs
- Storage: ~1.7 bytes per edge (78% savings vs naive array)
- `is_following(A,B)`: O(log N) btree + O(log M) binary search in buckets
- `get_followed(A)`: O(log N) btree, returns assembled array
- Insert: O(M) rebuild entire array (acceptable for bounded M)

**Reverse indexes** (unbounded by popularity, can reach millions):
- Use **composite keys** to avoid O(n) rewrites for popular users
- Key: (followed_uid, follower_uid) → Value: empty
- Storage: ~8 bytes per edge (higher overhead but scalable)
- `get_followers(A)`: O(log N + M) cursor range scan
- `follower_count(A)`: O(1) cached counter (updated on follow/unfollow)
- Insert: O(log N) single key write (critical for viral accounts)

Rationale: Users control who they follow (~500 max), but can't control follower count. Popular npubs with 1M+ followers need O(log n) insertion, not O(n) array rebuilds.

### Databases

**UID mapping (2 databases):**
- `uid_str_to_id`: 32-byte pubkey → UID (btree)
- `uid_id_to_str`: UID → 32-byte pubkey (btree)

**Follow graph (6 databases):**
- `sg_followed_by_user`: UID → bucketed_list<UID> (forward index - who you follow)
- `sg_followers_by_user`: (followed_uid, follower_uid) → empty (reverse index composite key)
- `sg_follower_count`: UID → u32 (cached follower count for O(1) queries)
- `sg_follow_distance`: UID → u32 distance from root user
- `sg_users_by_follow_distance`: (distance, UID) → empty (composite key index for distance queries)
- `sg_follow_list_created_at`: UID → u64 timestamp (prevents stale contact list processing)

**Mute graph (4 databases):**
- `sg_muted_by_user`: UID → bucketed_list<UID> (forward index - who you mute)
- `sg_user_muted_by`: (muted_uid, muter_uid) → empty (reverse index composite key)
- `sg_muter_count`: UID → u32 (cached muter count for O(1) queries)
- `sg_mute_list_created_at`: UID → u64 timestamp (prevents stale mute list processing)

All composite key indexes use LMDB's natural key ordering for efficient prefix scans. Counter databases provide O(1) follower/muter counts without cursor iteration.

## Usage

Contact lists (kind 3) and mute lists (kind 10000) are automatically processed during event ingestion. Query the graph via:

```c
struct ndb_txn txn;
ndb_begin_query(ndb, &txn);

// Follow graph queries
uint32_t distance = ndb_socialgraph_get_follow_distance(&txn, ndb, pubkey);
int follows = ndb_socialgraph_is_following(&txn, ndb, follower_pk, followed_pk);
int follower_count = ndb_socialgraph_follower_count(&txn, ndb, pubkey);
int followed_count = ndb_socialgraph_followed_count(&txn, ndb, pubkey);
int n_followed = ndb_socialgraph_get_followed(&txn, ndb, pubkey, followed_out, max_out);
int n_followers = ndb_socialgraph_get_followers(&txn, ndb, pubkey, followers_out, max_out);

// Mute list queries
int mutes = ndb_socialgraph_is_muting(&txn, ndb, muter_pk, muted_pk);
int n_muted = ndb_socialgraph_get_muted(&txn, ndb, pubkey, muted_out, max_out);
int n_muters = ndb_socialgraph_get_muters(&txn, ndb, pubkey, muters_out, max_out);
int muter_count = ndb_socialgraph_muter_count(&txn, ndb, pubkey);

ndb_end_query(&txn);
```

## Implementation

Based on [nostr-social-graph](https://github.com/mmalmi/nostr-social-graph).
245 changes: 245 additions & 0 deletions src/bucketed_u32_list.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,245 @@
#include "bucketed_u32_list.h"
#include <stdlib.h>
#include <string.h>

// Binary search helpers
static int bsearch_u16(const uint16_t *arr, uint32_t count, uint16_t val)
{
uint32_t left = 0, right = count;
while (left < right) {
uint32_t mid = (left + right) / 2;
if (arr[mid] == val)
return 1;
if (arr[mid] < val)
left = mid + 1;
else
right = mid;
}
return 0;
}

static int bsearch_u32(const uint32_t *arr, uint32_t count, uint32_t val)
{
uint32_t left = 0, right = count;
while (left < right) {
uint32_t mid = (left + right) / 2;
if (arr[mid] == val)
return 1;
if (arr[mid] < val)
left = mid + 1;
else
right = mid;
}
return 0;
}

int uid_list_contains(struct uid_list *list, ndb_uid_t uid)
{
if (list->count == 0)
return 0;

if (uid <= 0xFF) {
const uint8_t *u8_data = (const uint8_t*)list->data;
for (uint32_t i = 0; i < list->u8_offset; i++) {
if (u8_data[i] == (uint8_t)uid)
return 1;
}
return 0;
} else if (uid <= 0xFFFF) {
const uint16_t *u16_data = (const uint16_t*)(list->data + list->u8_offset);
uint32_t u16_count = (list->u16_offset - list->u8_offset) / sizeof(uint16_t);
return bsearch_u16(u16_data, u16_count, (uint16_t)uid);
} else {
const uint32_t *u32_data = (const uint32_t*)(list->data + list->u16_offset);
uint32_t u32_count = (list->u32_offset - list->u16_offset) / sizeof(uint32_t);
return bsearch_u32(u32_data, u32_count, uid);
}
}

struct uid_list *uid_list_create(uint32_t capacity)
{
size_t size = sizeof(struct uid_list) + capacity * sizeof(uint32_t);
struct uid_list *list = malloc(size);
if (list) {
list->count = 0;
list->u8_offset = 0;
list->u16_offset = 0;
list->u32_offset = 0;
}
return list;
}

static void uid_list_rebuild_buckets(struct uid_list *list, ndb_uid_t *temp_uids)
{
if (list->count == 0) {
list->u8_offset = 0;
list->u16_offset = 0;
list->u32_offset = 0;
return;
}

// Sort UIDs
for (uint32_t i = 0; i < list->count - 1; i++) {
for (uint32_t j = i + 1; j < list->count; j++) {
if (temp_uids[i] > temp_uids[j]) {
ndb_uid_t tmp = temp_uids[i];
temp_uids[i] = temp_uids[j];
temp_uids[j] = tmp;
}
}
}

// Count buckets
uint32_t u8_count = 0, u16_count = 0, u32_count = 0;
for (uint32_t i = 0; i < list->count; i++) {
if (temp_uids[i] <= 0xFF)
u8_count++;
else if (temp_uids[i] <= 0xFFFF)
u16_count++;
else
u32_count++;
}

// Pack into buckets
uint8_t *u8_ptr = (uint8_t*)list->data;
uint16_t *u16_ptr = (uint16_t*)(list->data + u8_count);
uint32_t *u32_ptr = (uint32_t*)(list->data + u8_count + u16_count * 2);

uint32_t u8_idx = 0, u16_idx = 0, u32_idx = 0;
for (uint32_t i = 0; i < list->count; i++) {
if (temp_uids[i] <= 0xFF)
u8_ptr[u8_idx++] = (uint8_t)temp_uids[i];
else if (temp_uids[i] <= 0xFFFF)
u16_ptr[u16_idx++] = (uint16_t)temp_uids[i];
else
u32_ptr[u32_idx++] = temp_uids[i];
}

list->u8_offset = u8_count;
list->u16_offset = u8_count + u16_count * 2;
list->u32_offset = u8_count + u16_count * 2 + u32_count * 4;
}

int uid_list_add(struct uid_list **list_ptr, uint32_t *capacity, ndb_uid_t uid)
{
struct uid_list *list = *list_ptr;

if (uid_list_contains(list, uid))
return 1;

if (list->count >= *capacity) {
uint32_t new_capacity = *capacity * 2;
size_t new_size = sizeof(struct uid_list) + new_capacity * sizeof(uint32_t);
struct uid_list *new_list = realloc(list, new_size);
if (!new_list)
return 0;
*list_ptr = new_list;
*capacity = new_capacity;
list = new_list;
}

ndb_uid_t *temp_uids = malloc((list->count + 1) * sizeof(ndb_uid_t));
if (!temp_uids)
return 0;

uint32_t idx = 0;
const uint8_t *u8_data = (const uint8_t*)list->data;
for (uint32_t i = 0; i < list->u8_offset; i++)
temp_uids[idx++] = u8_data[i];

const uint16_t *u16_data = (const uint16_t*)(list->data + list->u8_offset);
uint32_t u16_count = (list->u16_offset - list->u8_offset) / sizeof(uint16_t);
for (uint32_t i = 0; i < u16_count; i++)
temp_uids[idx++] = u16_data[i];

const uint32_t *u32_data = (const uint32_t*)(list->data + list->u16_offset);
uint32_t u32_count = (list->u32_offset - list->u16_offset) / sizeof(uint32_t);
for (uint32_t i = 0; i < u32_count; i++)
temp_uids[idx++] = u32_data[i];

temp_uids[list->count] = uid;
list->count++;

uid_list_rebuild_buckets(list, temp_uids);

free(temp_uids);
return 1;
}

size_t uid_list_size(struct uid_list *list)
{
return sizeof(struct uid_list) + list->u32_offset;
}

ndb_uid_t uid_list_get(struct uid_list *list, uint32_t index)
{
if (index >= list->count)
return 0;

uint32_t u8_count = list->u8_offset;
uint32_t u16_count = (list->u16_offset - list->u8_offset) / sizeof(uint16_t);

if (index < u8_count) {
const uint8_t *u8_data = (const uint8_t*)list->data;
return u8_data[index];
} else if (index < u8_count + u16_count) {
const uint16_t *u16_data = (const uint16_t*)(list->data + list->u8_offset);
return u16_data[index - u8_count];
} else {
const uint32_t *u32_data = (const uint32_t*)(list->data + list->u16_offset);
return u32_data[index - u8_count - u16_count];
}
}

void uid_list_remove_at(struct uid_list *list, uint32_t index)
{
if (index >= list->count)
return;

ndb_uid_t *temp_uids = malloc(list->count * sizeof(ndb_uid_t));
if (!temp_uids)
return;

uint32_t idx = 0;
for (uint32_t i = 0; i < list->count; i++) {
if (i != index)
temp_uids[idx++] = uid_list_get(list, i);
}

list->count--;
uid_list_rebuild_buckets(list, temp_uids);
free(temp_uids);
}

int uid_list_is_legacy(const struct uid_list *list)
{
return list->u8_offset == LEGACY_UID_LIST_MARKER;
}

struct uid_list *uid_list_from_legacy(const struct uid_list *legacy)
{
const ndb_uid_t *legacy_uids = (const ndb_uid_t*)(legacy->data);

size_t size = sizeof(struct uid_list) + legacy->count * sizeof(uint32_t);
struct uid_list *list = malloc(size);
if (!list)
return NULL;

list->count = 0;
list->u8_offset = 0;
list->u16_offset = 0;
list->u32_offset = 0;

ndb_uid_t *temp_uids = malloc(legacy->count * sizeof(ndb_uid_t));
if (!temp_uids) {
free(list);
return NULL;
}

memcpy(temp_uids, legacy_uids, legacy->count * sizeof(ndb_uid_t));
list->count = legacy->count;
uid_list_rebuild_buckets(list, temp_uids);

free(temp_uids);
return list;
}
Loading