Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions dissect/database/chromium/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
from __future__ import annotations

from dissect.database.chromium.cache import DiskCache, SimpleDiskCache

__all__ = [
"DiskCache",
"SimpleDiskCache",
]
13 changes: 13 additions & 0 deletions dissect/database/chromium/cache/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
from __future__ import annotations

from dissect.database.chromium.cache.c_cache import c_cache
from dissect.database.chromium.cache.c_simple import c_simple
from dissect.database.chromium.cache.cache import DiskCache
from dissect.database.chromium.cache.simple import SimpleDiskCache

__all__ = [
"DiskCache",
"SimpleDiskCache",
"c_cache",
"c_simple",
]
154 changes: 154 additions & 0 deletions dissect/database/chromium/cache/c_cache.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,154 @@
from __future__ import annotations

from dissect.cstruct import cstruct

# References:
# - https://chromium.googlesource.com/chromium/src/+/HEAD/net/disk_cache/blockfile/addr.h
# - https://chromium.googlesource.com/chromium/src/+/HEAD/net/disk_cache/blockfile/disk_format_base.h
# - https://chromium.googlesource.com/chromium/src/+/HEAD/net/disk_cache/blockfile/disk_format.h
cache_def = """

/* Cache Address format. */

enum FileType {
EXTERNAL = 0,
RANKINGS = 1,
BLOCK_256 = 2,
BLOCK_1K = 3,
BLOCK_4K = 4,
BLOCK_FILES = 5,
BLOCK_ENTRIES = 6,
BLOCK_EVICTED = 7
};

// int kMaxBlockSize = 4096 * 4;
// int16_t kMaxBlockFile = 255;
// int kMaxNumBlocks = 4;
// int16_t kFirstAdditionalBlockFile = 4;

#define kInitializedMask 0x80000000
#define kFileTypeMask 0x70000000
#define kFileTypeOffset 28
#define kReservedBitsMask 0x0c000000
#define kNumBlocksMask 0x03000000
#define kNumBlocksOffset 24
#define kFileSelectorMask 0x00ff0000
#define kFileSelectorOffset 16
#define kStartBlockMask 0x0000FFFF
#define kFileNameMask 0x0FFFFFFF

/* Cache types. */

/* Index file format. */
typedef uint32_t CacheAddr;

struct LruData {
int32 padding_1[2];
int32 filled; // Flag to tell when we filled the cache.
int32 sizes[5];
CacheAddr heads[5];
CacheAddr tails[5];
CacheAddr transaction; // In-flight operation target.
int32 operation; // Actual in-flight operation.
int32 operation_list; // In-flight operation list.
int32 padding_2[7];
};

struct IndexHeader {
uint32 magic; // 0xc3ca03c1
uint32 version;
int32 num_entries;
int32 num_bytes_legacy;
int32 last_file; // f_######
int32 dirty_flag;
CacheAddr stats;
int32 table_len;
int32 crash_flag;
int32 experiment_flag;
uint64 create_time;
int64 num_bytes;
int32 corruption_flag;
int32 padding[49];
LruData lru_data;
// CacheAddr table[table_len]; // max is kIndexTablesize (0x10000)
};

/* Data Block File Format. */
#define kBlockHeaderSize 8192

struct BlockFileHeader {
uint32 magic; // 0xc3ca04c1
uint32 version;
int16 this_file; // Index of this file (data_#).
int16 next_file; // Next file when this one is full (data_#).
int32 entry_size; // Size of the blocks of this file.
int32 num_entries; // Number of stored entries.
int32 max_entries; // Current maximum number of entries.
int32 empty[4];
int32 hints[4];
int32 updating;
int32 user[5];
// char allocation_map[4 * 2028];
// total header should be exactly kBlockHeaderSize bytes long (8192).
};

/* Cache Entry Format. */

enum EntryState {
ENTRY_NORMAL = 0,
ENTRY_EVICTED, // The entry was recently evicted from the cache.
ENTRY_DOOMED // The entry was doomed.
};

enum EntryFlags {
PARENT_ENTRY = 1, // This entry has children (sparse) entries.
CHILD_ENTRY = 1 << 1 // Child entry that stores sparse data.
};

struct EntryStore {
uint32 hash; // Full hash of the key.
CacheAddr next; // Next entry with the same hash or bucket.
CacheAddr rankings_node; // Rankings node for this entry.
int32 reuse_count; // How often is this entry used.
int32 refetch_count; // How often is this fetched from the net.
int32 state; // Current state.
uint64 creation_time;
int32 key_len;
CacheAddr long_key; // Optional address of a long key.

int32 data_size[4]; // We can store up to 4 data streams for
CacheAddr data_addr[4]; // each entry.

uint32 flags; // Any combination of EntryFlags.
int32 padding[4];
uint32 self_hash; // The hash of EntryStore up to this point.
char key[256 - 24 * 4]; // null terminated
};
"""

c_cache = cstruct(endian="<").load(cache_def)


def BlockSizeForFileType(file_type: int) -> int:
if file_type == 1: # RANKINGS
return 36

if file_type == 2: # BLOCK_256
return 256

if file_type == 3: # BLOCK_1K
return 1024

if file_type == 4: # BLOCK_4K
return 4096

if file_type == 5: # BLOCK_FILES
return 8

if file_type == 6: # BLOCK_ENTRIES
return 104

if file_type == 7: # BLOCK_EVICETED
return 48

raise ValueError(f"Unknown file_type {file_type!r}")
170 changes: 170 additions & 0 deletions dissect/database/chromium/cache/c_cache.pyi
Original file line number Diff line number Diff line change
@@ -0,0 +1,170 @@
# Generated by cstruct-stubgen
from typing import BinaryIO, Literal, TypeAlias, overload

import dissect.cstruct as __cs__

class _c_cache(__cs__.cstruct):
kInitializedMask: Literal[2147483648] = ...
kFileTypeMask: Literal[1879048192] = ...
kFileTypeOffset: Literal[28] = ...
kReservedBitsMask: Literal[201326592] = ...
kNumBlocksMask: Literal[50331648] = ...
kNumBlocksOffset: Literal[24] = ...
kFileSelectorMask: Literal[16711680] = ...
kFileSelectorOffset: Literal[16] = ...
kStartBlockMask: Literal[65535] = ...
kFileNameMask: Literal[268435455] = ...
class FileType(__cs__.Enum):
EXTERNAL = ...
RANKINGS = ...
BLOCK_256 = ...
BLOCK_1K = ...
BLOCK_4K = ...
BLOCK_FILES = ...
BLOCK_ENTRIES = ...
BLOCK_EVICTED = ...

CacheAddr: TypeAlias = _c_cache.uint32
class LruData(__cs__.Structure):
padding_1: __cs__.Array[_c_cache.int32]
filled: _c_cache.int32
sizes: __cs__.Array[_c_cache.int32]
heads: __cs__.Array[_c_cache.uint32]
tails: __cs__.Array[_c_cache.uint32]
transaction: _c_cache.uint32
operation: _c_cache.int32
operation_list: _c_cache.int32
padding_2: __cs__.Array[_c_cache.int32]
@overload
def __init__(
self,
padding_1: __cs__.Array[_c_cache.int32] | None = ...,
filled: _c_cache.int32 | None = ...,
sizes: __cs__.Array[_c_cache.int32] | None = ...,
heads: __cs__.Array[_c_cache.uint32] | None = ...,
tails: __cs__.Array[_c_cache.uint32] | None = ...,
transaction: _c_cache.uint32 | None = ...,
operation: _c_cache.int32 | None = ...,
operation_list: _c_cache.int32 | None = ...,
padding_2: __cs__.Array[_c_cache.int32] | None = ...,
): ...
@overload
def __init__(self, fh: bytes | memoryview | bytearray | BinaryIO, /): ...

class IndexHeader(__cs__.Structure):
magic: _c_cache.uint32
version: _c_cache.uint32
num_entries: _c_cache.int32
num_bytes_legacy: _c_cache.int32
last_file: _c_cache.int32
dirty_flag: _c_cache.int32
stats: _c_cache.uint32
table_len: _c_cache.int32
crash_flag: _c_cache.int32
experiment_flag: _c_cache.int32
create_time: _c_cache.uint64
num_bytes: _c_cache.int64
corruption_flag: _c_cache.int32
padding: __cs__.Array[_c_cache.int32]
lru_data: _c_cache.LruData
@overload
def __init__(
self,
magic: _c_cache.uint32 | None = ...,
version: _c_cache.uint32 | None = ...,
num_entries: _c_cache.int32 | None = ...,
num_bytes_legacy: _c_cache.int32 | None = ...,
last_file: _c_cache.int32 | None = ...,
dirty_flag: _c_cache.int32 | None = ...,
stats: _c_cache.uint32 | None = ...,
table_len: _c_cache.int32 | None = ...,
crash_flag: _c_cache.int32 | None = ...,
experiment_flag: _c_cache.int32 | None = ...,
create_time: _c_cache.uint64 | None = ...,
num_bytes: _c_cache.int64 | None = ...,
corruption_flag: _c_cache.int32 | None = ...,
padding: __cs__.Array[_c_cache.int32] | None = ...,
lru_data: _c_cache.LruData | None = ...,
): ...
@overload
def __init__(self, fh: bytes | memoryview | bytearray | BinaryIO, /): ...

class BlockFileHeader(__cs__.Structure):
magic: _c_cache.uint32
version: _c_cache.uint32
this_file: _c_cache.int16
next_file: _c_cache.int16
entry_size: _c_cache.int32
num_entries: _c_cache.int32
max_entries: _c_cache.int32
empty: __cs__.Array[_c_cache.int32]
hints: __cs__.Array[_c_cache.int32]
updating: _c_cache.int32
user: __cs__.Array[_c_cache.int32]
@overload
def __init__(
self,
magic: _c_cache.uint32 | None = ...,
version: _c_cache.uint32 | None = ...,
this_file: _c_cache.int16 | None = ...,
next_file: _c_cache.int16 | None = ...,
entry_size: _c_cache.int32 | None = ...,
num_entries: _c_cache.int32 | None = ...,
max_entries: _c_cache.int32 | None = ...,
empty: __cs__.Array[_c_cache.int32] | None = ...,
hints: __cs__.Array[_c_cache.int32] | None = ...,
updating: _c_cache.int32 | None = ...,
user: __cs__.Array[_c_cache.int32] | None = ...,
): ...
@overload
def __init__(self, fh: bytes | memoryview | bytearray | BinaryIO, /): ...

class EntryState(__cs__.Enum):
ENTRY_NORMAL = ...
ENTRY_EVICTED = ...
ENTRY_DOOMED = ...

class EntryFlags(__cs__.Enum):
PARENT_ENTRY = ...
CHILD_ENTRY = ...

class EntryStore(__cs__.Structure):
hash: _c_cache.uint32
next: _c_cache.uint32
rankings_node: _c_cache.uint32
reuse_count: _c_cache.int32
refetch_count: _c_cache.int32
state: _c_cache.int32
creation_time: _c_cache.uint64
key_len: _c_cache.int32
long_key: _c_cache.uint32
data_size: __cs__.Array[_c_cache.int32]
data_addr: __cs__.Array[_c_cache.uint32]
flags: _c_cache.uint32
padding: __cs__.Array[_c_cache.int32]
self_hash: _c_cache.uint32
key: __cs__.CharArray
@overload
def __init__(
self,
hash: _c_cache.uint32 | None = ...,
next: _c_cache.uint32 | None = ...,
rankings_node: _c_cache.uint32 | None = ...,
reuse_count: _c_cache.int32 | None = ...,
refetch_count: _c_cache.int32 | None = ...,
state: _c_cache.int32 | None = ...,
creation_time: _c_cache.uint64 | None = ...,
key_len: _c_cache.int32 | None = ...,
long_key: _c_cache.uint32 | None = ...,
data_size: __cs__.Array[_c_cache.int32] | None = ...,
data_addr: __cs__.Array[_c_cache.uint32] | None = ...,
flags: _c_cache.uint32 | None = ...,
padding: __cs__.Array[_c_cache.int32] | None = ...,
self_hash: _c_cache.uint32 | None = ...,
key: __cs__.CharArray | None = ...,
): ...
@overload
def __init__(self, fh: bytes | memoryview | bytearray | BinaryIO, /): ...

# Technically `c_cache` is an instance of `_c_cache`, but then we can't use it in type hints
c_cache: TypeAlias = _c_cache
Loading