levante-framework · digital-pro · Mar 4, 2026 · Mar 4, 2026 · Mar 4, 2026 · Mar 5, 2026
diff --git a/.gitignore b/.gitignore
@@ -6,8 +6,14 @@ dist
 **/.ipynb_checkpoints
 .idea
 .DS_Store.firebase/*.cache
+.firebase/
+.firebase/*.cache
 **/.DS_Store
 test-results/
 lib/
 .env
 .idea/
+.vscode/
+task-launcher/.venv/
+task-launcher/data/
+task-launcher/firestore-debug.log
diff --git a/README.md b/README.md
@@ -26,6 +26,38 @@ npm run dev
 You can now locally run tasks e.g. TROG `http://localhost:8080/?task=trog`. 
 Task parameters are documented here (TODO linkme).
 
+### Location Selection Save Debug (Emulator)
+
+For testing location saves against the Firebase emulators:
+
+1. Start the emulators from the functions repo (auth `9290`, functions `5005`, firestore `8185`, UI `4002`).
+2. Open the task with debug save enabled:
+   `http://localhost:8080/?task=locationselection&locationSaveDebug=true`
+3. Click **Save** and confirm a `locations` doc appears in the Emulator UI.
+
+### Kontur Population Cache
+
+The population lookup uses a local Kontur cache if available, otherwise it falls back to WorldPop.
+You can point the dev server at a compressed, sparse Kontur cache stored elsewhere (e.g. GCS) by
+setting one of these environment variables before starting `npm run dev`. The cache is sharded
+by R5 parent cell, so the URL/path should be a *folder* containing `{r5CellId}.json.gz` files:
+
+- `KONTUR_H3_CACHE_URL` (base URL; supports `.gz` shards)
+- `KONTUR_H3_CACHE_PATH` (base folder for local shards)
+
+#### Build the R5 shard cache
+
+We provide a repeatable script to download the Kontur dataset and build R5 shards:
+
+```bash
+cd /home/david/levante/core-tasks/task-launcher
+pip install h3 pyarrow
+python scripts/build_kontur_r5_shards.py --download --gzip --output data/kontur-h3-r5
+```
+
+This uses the latest 400m Kontur dataset from HDX and requires `ogr2ogr` (GDAL) to convert
+the GeoPackage into Parquet/CSV for streaming.
+
 Task details:
 
 1. [Matrix Reasoning](https://hs-levante-assessment-dev.web.app/?task=matrix-reasoning) [George]

diff --git a/task-launcher/.firebase/hosting.ZGlzdA.cache b/task-launcher/.firebase/hosting.ZGlzdA.cache
diff --git a/task-launcher/.gitignore b/task-launcher/.gitignore
@@ -1,3 +1,6 @@
+.venv/
+data/
+firestore-debug.log
 node_modules
 dist
 **/.Rhistory

diff --git a/task-launcher/firebase.json b/task-launcher/firebase.json
@@ -7,15 +7,15 @@
   "emulators": {
     "auth": {
       "host": "127.0.0.1",
-      "port": 9199
+      "port": 9290
     },
     "firestore": {
       "host": "127.0.0.1",
-      "port": 8180
+      "port": 8185
     },
     "functions": {
       "host": "127.0.0.1",
-      "port": 5002
+      "port": 5005
     },
     "ui": {
       "host": "127.0.0.1",

diff --git a/task-launcher/package-lock.json b/task-launcher/package-lock.json
diff --git a/task-launcher/package.json b/task-launcher/package.json
@@ -52,6 +52,7 @@
     "@sentry/browser": "^8.7.0",
     "cypress-real-events": "^1.13.0",
     "fscreen": "^1.2.0",
+    "h3-js": "^4.4.0",
     "i18next": "^22.4.15",
     "i18next-browser-languagedetector": "^7.0.1",
     "jspsych": "^7.2.1",

diff --git a/task-launcher/scripts/build_kontur_r5_shards.py b/task-launcher/scripts/build_kontur_r5_shards.py
@@ -0,0 +1,229 @@
+#!/usr/bin/env python3
+import argparse
+import csv
+import gzip
+import json
+import os
+import shutil
+import subprocess
+import sys
+import urllib.request
+from collections import OrderedDict
+
+try:
+    import h3
+except ImportError as exc:
+    raise SystemExit("Missing dependency: pip install h3") from exc
+
+try:
+    import pyarrow.dataset as ds
+except ImportError:
+    ds = None
+
+
+DEFAULT_DATASET_URL = (
+    "https://geodata-eu-central-1-kontur-public.s3.eu-central-1.amazonaws.com/"
+    "kontur_datasets/kontur_population_20231101.gpkg.gz"
+)
+
+
+def download_file(url: str, dest_path: str) -> None:
+    os.makedirs(os.path.dirname(dest_path), exist_ok=True)
+    with urllib.request.urlopen(url) as response, open(dest_path, "wb") as out_file:
+        shutil.copyfileobj(response, out_file)
+
+
+def gunzip_file(src_path: str, dest_path: str) -> None:
+    os.makedirs(os.path.dirname(dest_path), exist_ok=True)
+    with gzip.open(src_path, "rb") as src, open(dest_path, "wb") as dest:
+        shutil.copyfileobj(src, dest)
+
+
+def ensure_tabular_from_gpkg(gpkg_path: str, parquet_path: str, csv_path: str) -> tuple[str, str]:
+    if os.path.exists(parquet_path):
+        return "parquet", parquet_path
+    if os.path.exists(csv_path):
+        return "csv", csv_path
+    ogr2ogr = shutil.which("ogr2ogr")
+    if not ogr2ogr:
+        raise SystemExit("ogr2ogr not found; install GDAL to convert GPKG.")
+    os.makedirs(os.path.dirname(parquet_path), exist_ok=True)
+    if ds is not None:
+        cmd = [
+            ogr2ogr,
+            "-f",
+            "Parquet",
+            parquet_path,
+            gpkg_path,
+            "-select",
+            "h3,population",
+        ]
+        try:
+            subprocess.check_call(cmd)
+            return "parquet", parquet_path
+        except subprocess.CalledProcessError:
+            pass
+    cmd = [
+        ogr2ogr,
+        "-f",
+        "CSV",
+        csv_path,
+        gpkg_path,
+        "-select",
+        "h3,population",
+    ]
+    subprocess.check_call(cmd)
+    return "csv", csv_path
+
+
+def iter_rows_from_parquet(parquet_path: str):
+    if ds is None:
+        raise SystemExit("pyarrow not available for parquet parsing.")
+    dataset = ds.dataset(parquet_path, format="parquet")
+    for batch in dataset.to_batches(columns=["h3", "population"]):
+        h3_col = batch.column(0).to_pylist()
+        pop_col = batch.column(1).to_pylist()
+        for h3_id, pop in zip(h3_col, pop_col):
+            yield h3_id, pop
+
+
+def iter_rows_from_csv(csv_path: str):
+    with open(csv_path, "r", encoding="utf-8") as fh:
+        reader = csv.DictReader(fh)
+        for row in reader:
+            yield row.get("h3"), row.get("population")
+
+
+def merge_into(existing: dict, incoming: dict) -> dict:
+    for res, cells in incoming.items():
+        res_map = existing.setdefault(res, {})
+        for cell_id, pop in cells.items():
+            res_map[cell_id] = res_map.get(cell_id, 0) + pop
+    return existing
+
+
+def flush_shard(output_dir: str, r5_cell_id: str, shard_data: dict, gzip_output: bool) -> None:
+    os.makedirs(output_dir, exist_ok=True)
+    filename = f"{r5_cell_id}.json.gz" if gzip_output else f"{r5_cell_id}.json"
+    output_path = os.path.join(output_dir, filename)
+    existing = {}
+    if os.path.exists(output_path):
+        if gzip_output:
+            with gzip.open(output_path, "rt", encoding="utf-8") as fh:
+                existing = json.load(fh)
+        else:
+            with open(output_path, "r", encoding="utf-8") as fh:
+                existing = json.load(fh)
+    merged = merge_into(existing.get("resolutions", {}), shard_data)
+    payload = {"resolutions": merged}
+    if gzip_output:
+        with gzip.open(output_path, "wt", encoding="utf-8") as fh:
+            json.dump(payload, fh, separators=(",", ":"))
+    else:
+        with open(output_path, "w", encoding="utf-8") as fh:
+            json.dump(payload, fh, separators=(",", ":"))
+
+
+def build_shards(
+    input_path: str,
+    input_format: str,
+    output_dir: str,
+    resolutions: list[int],
+    max_shards: int,
+    gzip_output: bool,
+) -> None:
+    shard_cache: OrderedDict[str, dict] = OrderedDict()
+
+    def get_shard(r5_cell_id: str) -> dict:
+        if r5_cell_id in shard_cache:
+            shard_cache.move_to_end(r5_cell_id)
+            return shard_cache[r5_cell_id]
+        if len(shard_cache) >= max_shards:
+            oldest_r5, oldest_data = shard_cache.popitem(last=False)
+            flush_shard(output_dir, oldest_r5, oldest_data, gzip_output)
+        shard_cache[r5_cell_id] = {str(res): {} for res in resolutions}
+        return shard_cache[r5_cell_id]
+
+    if input_format == "parquet":
+        row_iter = iter_rows_from_parquet(input_path)
+    else:
+        row_iter = iter_rows_from_csv(input_path)
+
+    for h3_id, pop in row_iter:
+        if h3_id is None or pop is None:
+            continue
+        try:
+            pop_val = float(pop)
+        except (TypeError, ValueError):
+            continue
+        if pop_val <= 0:
+            continue
+        try:
+            base_resolution = h3.get_resolution(h3_id)
+        except Exception:
+            continue
+        try:
+            r5_cell = h3.cell_to_parent(h3_id, 5)
+        except Exception:
+            continue
+
+        shard = get_shard(r5_cell)
+        for res in resolutions:
+            if res > base_resolution:
+                continue
+            try:
+                parent = h3.cell_to_parent(h3_id, res)
+            except Exception:
+                continue
+            res_map = shard[str(res)]
+            res_map[parent] = res_map.get(parent, 0) + pop_val
+
+    for r5_cell, data in shard_cache.items():
+        flush_shard(output_dir, r5_cell, data, gzip_output)
+
+
+def main() -> None:
+    parser = argparse.ArgumentParser(description="Build R5-sharded Kontur H3 population cache.")
+    parser.add_argument("--input", help="Input Parquet file with h3,population columns.")
+    parser.add_argument("--output", default="data/kontur-h3-r5", help="Output shard directory.")
+    parser.add_argument("--resolutions", default="5,6,7", help="Comma-separated resolutions to build.")
+    parser.add_argument("--max-shards", type=int, default=64, help="Max in-memory shard count.")
+    parser.add_argument("--download", action="store_true", help="Download and convert dataset.")
+    parser.add_argument("--gzip", action="store_true", help="Write .json.gz shards.")
+    args = parser.parse_args()
+
+    if args.download:
+        raw_dir = os.path.join("data", "kontur", "raw")
+        os.makedirs(raw_dir, exist_ok=True)
+        gz_path = os.path.join(raw_dir, "kontur_population_20231101.gpkg.gz")
+        gpkg_path = os.path.join(raw_dir, "kontur_population_20231101.gpkg")
+        parquet_path = os.path.join(raw_dir, "kontur_population_20231101.parquet")
+        csv_path = os.path.join(raw_dir, "kontur_population_20231101.csv")
+        if not os.path.exists(gz_path):
+            print(f"Downloading {DEFAULT_DATASET_URL} ...")
+            download_file(DEFAULT_DATASET_URL, gz_path)
+        if not os.path.exists(gpkg_path):
+            print("Extracting .gpkg.gz ...")
+            gunzip_file(gz_path, gpkg_path)
+        input_format, input_path = ensure_tabular_from_gpkg(gpkg_path, parquet_path, csv_path)
+    else:
+        input_path = args.input
+        input_format = "parquet" if input_path and input_path.endswith(".parquet") else "csv"
+
+    if not input_path or not os.path.exists(input_path):
+        raise SystemExit("Input Parquet file not found. Use --input or --download.")
+
+    resolutions = [int(x.strip()) for x in args.resolutions.split(",") if x.strip()]
+    build_shards(
+        input_path=input_path,
+        input_format=input_format,
+        output_dir=args.output,
+        resolutions=resolutions,
+        max_shards=max(args.max_shards, 1),
+        gzip_output=args.gzip,
+    )
+    print(f"Shards written to {args.output}")
+
+
+if __name__ == "__main__":
+    main()