Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,8 @@
A Dissect module implementing parsers for various database formats, including:

- Berkeley DB, used for example in older RPM databases
- Microsofts Extensible Storage Engine (ESE), used for example in Active Directory, Exchange and Windows Update
- Microsoft's Extensible Storage Engine (ESE), used for example in Active Directory, Exchange and Windows Update
- Google's LevelDB, used by browsers to store LocalStorage, SessionStorage and serialized IndexedDB databases
- SQLite3, commonly used by applications to store configuration data

For more information, please see [the documentation](https://docs.dissect.tools/en/latest/projects/dissect.database/index.html).
Expand Down
8 changes: 8 additions & 0 deletions dissect/database/__init__.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,21 @@
from __future__ import annotations

from dissect.database.bsd.db import DB
from dissect.database.chromium.localstorage.localstorage import LocalStorage
from dissect.database.chromium.sessionstorage.sessionstorage import SessionStorage
from dissect.database.ese.ese import ESE
from dissect.database.exception import Error
from dissect.database.indexeddb.indexeddb import IndexedDB
from dissect.database.leveldb.leveldb import LevelDB
from dissect.database.sqlite3.sqlite3 import SQLite3

__all__ = [
"DB",
"ESE",
"Error",
"IndexedDB",
"LevelDB",
"LocalStorage",
"SQLite3",
"SessionStorage",
]
9 changes: 9 additions & 0 deletions dissect/database/chromium/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
from __future__ import annotations

from dissect.database.chromium.localstorage.localstorage import LocalStorage
from dissect.database.chromium.sessionstorage.sessionstorage import SessionStorage

__all__ = [
"LocalStorage",
"SessionStorage",
]
12 changes: 12 additions & 0 deletions dissect/database/chromium/localstorage/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
from __future__ import annotations

from dissect.database.chromium.localstorage.c_localstorage import c_localstorage
from dissect.database.chromium.localstorage.localstorage import Key, LocalStorage, MetaKey, Store

__all__ = [
"Key",
"LocalStorage",
"MetaKey",
"Store",
"c_localstorage",
]
28 changes: 28 additions & 0 deletions dissect/database/chromium/localstorage/c_localstorage.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
from __future__ import annotations

from dissect.cstruct import cstruct

from dissect.database.util.protobuf import ProtobufVarint, ProtobufVarint32

# References:
# - https://github.com/chromium/chromium/blob/main/components/services/storage/dom_storage/local_storage_database.proto
localstorage_def = """
struct LocalStorageAreaWriteMetaData {
uint8 lm_type;
varint last_modified;

uint8 sb_type;
varint size_bytes;
};

struct LocalStorageAreaAccessMetaData {
uint8 la_type;
varint last_accessed;
};
"""

c_localstorage = cstruct()
c_localstorage.add_custom_type("varint", ProtobufVarint, size=None, alignment=1, signed=False)
c_localstorage.add_custom_type("varint64", ProtobufVarint, size=None, alignment=1, signed=False)
c_localstorage.add_custom_type("varint32", ProtobufVarint32, size=None, alignment=1, signed=False)
c_localstorage.load(localstorage_def)
32 changes: 32 additions & 0 deletions dissect/database/chromium/localstorage/c_localstorage.pyi
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
# Generated by cstruct-stubgen
from typing import BinaryIO, Literal, TypeAlias, overload

import dissect.cstruct as __cs__

class _c_localstorage(__cs__.cstruct):
class varint(__cs__.ProtobufVarint): ...

class varint64(__cs__.ProtobufVarint): ...

class varint32(__cs__.ProtobufVarint32): ...

class LocalStorageAreaWriteMetaData(__cs__.Structure):
lm_type: _c_localstorage.uint8
last_modified: _c_localstorage.varint
sb_type: _c_localstorage.uint8
size_bytes: _c_localstorage.varint
@overload
def __init__(self, lm_type: _c_localstorage.uint8 | None = ..., last_modified: _c_localstorage.varint | None = ..., sb_type: _c_localstorage.uint8 | None = ..., size_bytes: _c_localstorage.varint | None = ...): ...
@overload
def __init__(self, fh: bytes | memoryview | bytearray | BinaryIO, /): ...

class LocalStorageAreaAccessMetaData(__cs__.Structure):
la_type: _c_localstorage.uint8
last_accessed: _c_localstorage.varint
@overload
def __init__(self, la_type: _c_localstorage.uint8 | None = ..., last_accessed: _c_localstorage.varint | None = ...): ...
@overload
def __init__(self, fh: bytes | memoryview | bytearray | BinaryIO, /): ...

# Technically `c_localstorage` is an instance of `_c_localstorage`, but then we can't use it in type hints
c_localstorage: TypeAlias = _c_localstorage
213 changes: 213 additions & 0 deletions dissect/database/chromium/localstorage/localstorage.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,213 @@
from __future__ import annotations

from functools import cached_property
from typing import TYPE_CHECKING

from dissect.util.ts import webkittimestamp

from dissect.database.chromium.localstorage import c_localstorage
from dissect.database.leveldb.c_leveldb import c_leveldb
from dissect.database.leveldb.leveldb import LevelDB

if TYPE_CHECKING:
from pathlib import Path


class LocalStorage:
"""Google LocalStorage implementation.

References:
- https://www.cclsolutionsgroup.com/post/chromium-session-storage-and-local-storage
"""

def __init__(self, path: Path):
if not path.exists():
raise FileNotFoundError(f"Provided path does not exist: {path!r}")

if not path.is_dir():
raise NotADirectoryError(f"Provided path is not a directory: {path!r}")

self.path = path
self.leveldb = LevelDB(path)

def __repr__(self) -> str:
return f"<LocalStorage path='{self.path!s}' stores={len(self.stores)!r}>"

@property
def stores(self) -> list[Store]:
"""Iterate over LevelDB records for store meta information."""

meta_keys = {}

for record in self.leveldb.records:
if record.state == c_leveldb.RecordState.LIVE and (
record.key.startswith((b"META:", b"METAACCESS:"))
):
cls = MetaKey if record.key[0:5] == b"META:" else MetaAccessKey
meta_key = cls(record.key, record.value, record.state, record.sequence)
meta_keys.setdefault(meta_key.key, []).append(meta_key)

return [Store(self, meta) for meta in meta_keys.values()]

def store(self, key: str) -> Store | None:
"""Get a single store by host name."""
for store in self.stores:
if store.host == key:
return store
return None


class Store:
"""Represents a single store of keys."""

def __init__(self, local_storage: LocalStorage, meta: list[MetaKey]):
self.local_storage = local_storage

self.host = meta[0].key
self.meta = sorted(meta, key=lambda m: m.sequence)

def __repr__(self) -> str:
return f"<Store host={self.host!r} records={len(self.records)!r}>"

@property
def records(self) -> list[RecordKey]:
"""Returns all records related to this store."""

# e.g. with "_https://google.com\x00\x01MyKey", the prefix would be "_https://google.com\x00"
prefix = RecordKey.prefix + self.host.encode("iso-8859-1") + b"\x00"
prefix_len = len(prefix)

return [
RecordKey(self, record.key, record.value, record.state, record.sequence)
for record in self.local_storage.leveldb.records
if record.key[:prefix_len] == prefix
]

def get(self, key: str) -> RecordKey | None:
"""Get a single :class:`RecordKey` by the given string identifier."""
for record in self.records:
if record.key == key:
return record
return None


class Key:
"""Abstract LocalStorage key class."""

prefix: bytes
state: c_leveldb.RecordState
sequence: int
key: str
value: str

def __init__(self, raw_key: bytes, raw_value: bytes, state: c_leveldb.RecordState, sequence: int):
self._raw_key = raw_key
self._raw_value = raw_value

self.state = state
self.sequence = sequence

if not raw_key.startswith(self.prefix):
raise ValueError(
f"Invalid key prefix {raw_key[: len(self.prefix)]!r} for {self.__class__.__name__}: expected {self.prefix!r}" # noqa: E501
)

def __repr__(self):
return f"<{self.__class__.__name__} state={self.state!r} sequence={self.sequence!r} key={self.key!r} value={self.value!r}>" # noqa: E501


class MetaKey(Key):
"""Represents a LocalStorage meta key."""

prefix: bytes = b"META:"
value: c_localstorage.LocalStorageAreaWriteMetaData

def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
self.key = self._raw_key.removeprefix(self.prefix).decode("iso-8859-1")
self.value = c_localstorage.LocalStorageAreaWriteMetaData(self._raw_value)


class MetaAccessKey(Key):
"""Represents a LocalStorage meta access key.

References:
- https://chromium-review.googlesource.com/c/chromium/src/+/5585301
"""

prefix: bytes = b"METAACCESS:"
value: c_localstorage.LocalStorageAreaAccessMetaData

def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
self.key = self._raw_key.removeprefix(self.prefix).decode("iso-8859-1")
self.value = c_localstorage.LocalStorageAreaAccessMetaData(self._raw_value)


class RecordKey(Key):
"""Represents a LocalStorage record key."""

prefix: bytes = b"_"

def __init__(self, store: Store, raw_key: bytes, raw_value: bytes, state: c_leveldb.RecordState, sequence: int):
super().__init__(raw_key, raw_value, state, sequence)
self.store = store

@cached_property
def meta(self) -> dict:
"""Calculate the metadata that likely belongs to this key.

In a batch write action, meta keys are written first, followed by the records belonging to that batch.
To identify a candidate meta key for this record key, we iterate over the meta keys for the store that
this record key belongs to and choose the meta key(s) with the closest sequence number that is lower than
the record key sequence number. This introduces a possible inaccuracy for the matched timestamp(s).

The accuracy of these timestamps should be taken with a grain of salt when interpreting them. A latency of
5 to 60 seconds was observed between a script requesting a write and the key data ending up on disk. The
latency depends on several factors, such as how many write actions are happening at the time of write and
the amount of writes per host (website) happening (this is limited to 60 per hour).

The reader (you!) is invited to invent a smarter method to efficiently find metadata belonging to a record key.

References:
- local_storage_impl.cc
"""
meta = {"created": None, "last_modified": None, "last_accessed": None}

for meta_key in self.store.meta:
if meta_key.sequence < self.sequence:
if hasattr(meta_key.value, "last_modified"):
meta["last_modified"] = webkittimestamp(meta_key.value.last_modified)
if not meta["created"]:
meta["created"] = meta["last_modified"]
if hasattr(meta_key.value, "last_accessed"):
meta["last_accessed"] = webkittimestamp(meta_key.value.last_accessed)
if not meta["created"] or meta["created"] > meta["last_accessed"]:
meta["created"] = meta["last_accessed"]

elif meta_key.sequence > self.sequence:
break

return meta

def _decode_key(self) -> None:
_, _, buf = self._raw_key.removeprefix(self.prefix).partition(b"\x00")

if buf[0] == 0x00:
self.key = buf[1:].decode("utf-16-le")

if buf[0] == 0x01:
self.key = buf[1:].decode("iso-8859-1")

def _decode_value(self) -> None:
buf = self._raw_value

if not buf:
self.value = None
return

if buf[0] == 0x00:
self.value = buf[1:].decode("utf-16-le")

if buf[0] == 0x01:
self.value = buf[1:].decode("iso-8859-1")
9 changes: 9 additions & 0 deletions dissect/database/chromium/sessionstorage/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
from __future__ import annotations

from dissect.database.chromium.sessionstorage.sessionstorage import Namespace, Record, SessionStorage

__all__ = [
"Namespace",
"Record",
"SessionStorage",
]
Loading