From 38300ed86afa39b8b6aabc2ad764755862689701 Mon Sep 17 00:00:00 2001 From: vringar Date: Wed, 4 Feb 2026 20:35:59 +0100 Subject: [PATCH 1/2] docs: add AGENTS.md --- AGENTS.md | 116 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 116 insertions(+) create mode 100644 AGENTS.md diff --git a/AGENTS.md b/AGENTS.md new file mode 100644 index 000000000..545349ede --- /dev/null +++ b/AGENTS.md @@ -0,0 +1,116 @@ +# AGENTS.md + +This file provides guidance to AI coding agents when working with code in this repository. + +## Project Overview + +OpenWPM is a web privacy measurement framework for conducting large-scale privacy studies (thousands to millions of websites). Built on Firefox with Selenium automation, it captures HTTP traffic, JavaScript API calls, cookies, navigation, and DNS queries through a privileged WebExtension. + +## Build & Development Commands + +### Initial Setup +```bash +./install.sh # Creates conda env, installs Firefox, builds extension +``` + +### Running Tests +```bash +# Run all tests +pytest + +# Run specific test file +pytest test/test_storage.py -v + +# Run tests matching pattern +pytest -k "test_http" -v + +# Run tests by marker +pytest -m pyonly # Python-only tests (no browser) +pytest -m slow # Slow tests +``` + +### Linting & Formatting + +**Python:** +```bash +pre-commit run --all-files # Run all hooks +black . # Format Python +isort . # Sort imports +mypy openwpm # Type checking +``` + +**Extension (from Extension/ directory):** +```bash +npm run lint # ESLint + Prettier + web-ext lint +npm run fix # Auto-fix issues +npm run build # Rebuild extension (TypeScript → webpack → web-ext) +``` + +### Rebuilding Extension +```bash +scripts/build-extension.sh +# Or from Extension/: npm run build +``` + +### Updating Dependencies +```bash +scripts/repin.sh # Don't edit environment.yaml directly +``` + +## Architecture + +``` +TaskManager (orchestrator) +├── BrowserManagerHandle[] → BrowserManager (per-browser process) +│ └── Selenium WebDriver → Firefox + WebExtension +└── StorageController (isolated process) + ├── StructuredStorageProvider (SQLite/Parquet/S3/GCS) + └── UnstructuredStorageProvider (LevelDB/Gzip/S3/GCS) +``` + +### Core Components + +- **TaskManager** (`openwpm/task_manager.py`): Orchestrates browsers, manages command queues, runs watchdogs for crash recovery +- **BrowserManager** (`openwpm/browser_manager.py`): Wraps Selenium, executes commands, handles browser lifecycle +- **WebExtension** (`Extension/src/`): TypeScript extension collecting HTTP, cookies, JS calls, navigation via privileged browser APIs +- **Storage System** (`openwpm/storage/`): Receives data from extension via sockets, writes to configured backends +- **Commands** (`openwpm/commands/`): Extend `BaseCommand` with `execute()` method + +### Configuration + +- `ManagerParams`: Platform settings (num_browsers, data_directory, log_path) +- `BrowserParams`: Per-browser settings (instrumentation flags, display mode, profile handling) +- Both defined in `openwpm/config.py` with validation functions +- See [docs/Configuration.md](docs/Configuration.md) for full details + +### Known Issues + +- `callstack_instrument` is broken — enabling it raises `ConfigError`. See [#557](https://github.com/openwpm/OpenWPM/issues/557). + +### Data Schema + +Schema files must be kept in sync: +- `openwpm/storage/schema.sql` (SQLite) +- `openwpm/storage/parquet_schema.py` (Parquet) + +## Scratch Space + +Use `datadir/` (project root) for any temporary or scratch data — crawl outputs, test databases, logs, etc. This directory is gitignored and is the conventional location for local data. Do not use `/tmp`, `~`, or other locations outside the project. + +`demo.py` defaults to writing here (`manager_params.data_directory = Path("./datadir/")`). The directory is created on demand. + +## Release Process + +When creating releases, PRs, or interacting with GitHub, prefer using the `gh` CLI over the web UI. + +## Key Files + +- `demo.py`: Reference implementation showing typical usage +- `custom_command.py`: Example of custom command implementation +- `test/manual_test.py`: Interactive debugging (`python -m test.manual_test --selenium`) + +## Display Modes + +- `native`: GUI visible (default) +- `headless`: Firefox headless (no X server needed) +- `xvfb`: X virtual framebuffer (full browser, no GUI, for servers) From f7b555e43061fe93838299de396bb7baba65b84a Mon Sep 17 00:00:00 2001 From: vringar Date: Fri, 20 Feb 2026 11:47:58 +0100 Subject: [PATCH 2/2] chore: add NixOS dev environment under nix/ --- nix/README.md | 13 ++++++ nix/shell.nix | 118 ++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 131 insertions(+) create mode 100644 nix/README.md create mode 100644 nix/shell.nix diff --git a/nix/README.md b/nix/README.md new file mode 100644 index 000000000..e29757409 --- /dev/null +++ b/nix/README.md @@ -0,0 +1,13 @@ +# Nix Development Environment + +**This is only intended for NixOS users.** If you are on a standard Linux distribution or macOS, follow the regular setup instructions in the project root README instead. + +NixOS cannot run the pre-built Firefox binaries that OpenWPM downloads without an FHS-compatible environment. The `shell.nix` in this directory provides that environment. + +## Usage + +```sh +nix-shell nix/shell.nix +``` + +This drops you into an FHS environment with the `openwpm` conda env already activated. diff --git a/nix/shell.nix b/nix/shell.nix new file mode 100644 index 000000000..077a7662b --- /dev/null +++ b/nix/shell.nix @@ -0,0 +1,118 @@ +{ pkgs ? import {} }: + +let + packages = with pkgs; [ + # Build essentials + stdenv.cc.cc.lib + gcc + gnumake + pkg-config + zlib + openssl + + # Python build dependencies + libffi + readline + ncurses + bzip2 + xz + sqlite + + # Firefox dependencies + gtk3 + glib + atk + gdk-pixbuf + pciutils + dbus-glib + libGL + libGLU + alsa-lib + libpulseaudio + ffmpeg + pango + cairo + freetype + fontconfig + + # X11 libraries + libx11 + libxext + libxrender + libxtst + libxi + libxcomposite + libxcursor + libxdamage + libxfixes + libxrandr + libxcb + xvfb + + # Utilities + git + which + curl + wget + file + gnugrep + coreutils + bashInteractive + + # Node.js for extension build + nodejs_22 + ]; + + libraryPath = pkgs.lib.makeLibraryPath (with pkgs; [ + stdenv.cc.cc.lib + zlib + libGL + glib + gtk3 + atk + gdk-pixbuf + pciutils + alsa-lib + libpulseaudio + libx11 + libxext + libxrender + libxtst + libxcomposite + libxcursor + libxdamage + libxfixes + libxrandr + dbus-glib + pango + cairo + freetype + fontconfig + ]); + + fhsEnv = pkgs.buildFHSEnv { + name = "openwpm"; + targetPkgs = _: packages; + profile = '' + export LD_LIBRARY_PATH="${libraryPath}:$LD_LIBRARY_PATH" + export PATH="$HOME/.conda/bin:$PATH" + source "$HOME/.conda/etc/profile.d/conda.sh" + ''; + runScript = let + condaInit = pkgs.writeText "conda-init.sh" '' + set -h # Enable command hashing + source "$HOME/.conda/etc/profile.d/conda.sh" + conda activate openwpm + ''; + in pkgs.writeShellScript "openwpm-shell" '' + export BASH_ENV="${condaInit}" + exec bash --rcfile "${condaInit}" "$@" + ''; + }; + +in pkgs.mkShell { + packages = [ fhsEnv ]; + shellHook = '' + exec openwpm + ''; +}