diff --git a/.env.whaletag b/.env.whaletag new file mode 100644 index 0000000..5fb29cf --- /dev/null +++ b/.env.whaletag @@ -0,0 +1,43 @@ +# Whale Tag Simulator Configuration +# Choose ONE network configuration below by uncommenting the relevant section + +# ============================================================================= +# OPTION 1: Bridge Network (Default - Recommended for Most Users) +# ============================================================================= +# Container gets its own IP on Docker's bridge network +# Pro: Works out of the box, no network configuration needed +# Con: Not discoverable by 'ceti whaletag -l' (use container IP directly) +# +# To use: Uncomment the lines below +WHALETAG_HOSTNAME=wt-b827eb123456 +WHALETAG_PASSWORD=ceticeti +NETWORK_MODE=bridge + +# ============================================================================= +# OPTION 2: Host Network (Simple but Port Conflicts) +# ============================================================================= +# Container uses host's network stack directly +# Pro: Simple, discoverable on LAN +# Con: Port 22 conflicts with host SSH server (NOT RECOMMENDED) +# +# To use: Comment out Option 1, uncomment lines below +# WHALETAG_HOSTNAME=wt-b827eb123456 +# WHALETAG_PASSWORD=ceticeti +# NETWORK_MODE=host + +# ============================================================================= +# OPTION 3: MacVLAN Network (Advanced - Real LAN IP via DHCP) +# ============================================================================= +# Container gets a real IP address on your LAN (like a physical device) +# Pro: Discoverable by 'ceti whaletag -l', most realistic simulation +# Con: Requires network interface configuration, host can't directly access container +# +# To use: Comment out Option 1, uncomment and configure lines below +# WHALETAG_HOSTNAME=wt-b827eb123456 +# WHALETAG_PASSWORD=ceticeti +# NETWORK_MODE=macvlan +# +# Find your network interface: ip addr | grep -E '^[0-9]+: (eth|wlan|enp)' +# NETWORK_INTERFACE=eth0 + +# Container will get IP via DHCP from your router (no manual IP needed) diff --git a/COMMIT_MESSAGE.txt b/COMMIT_MESSAGE.txt new file mode 100644 index 0000000..ce1eb88 --- /dev/null +++ b/COMMIT_MESSAGE.txt @@ -0,0 +1,46 @@ +Add whale tag simulator for local testing + +Implements a Docker-based whale tag simulator that allows developers to test +`ceti whaletag` commands locally without physical hardware. This enables faster +development iteration and automated testing of whale tag data download workflows. + +Features: +- Alpine Linux container (~12 MB) with SSH server and test data +- Bridge network mode (default) - works out of the box +- Macvlan mode (advanced) - for LAN discovery testing +- Automated pytest integration with custom marker to exclude tests by default +- Sample test files (audio .raw/.flac, sensors CSV) mimicking real whale tag data + +Changes: +- Add Dockerfile.whaletag: Alpine-based image with openssh-server and dhcpcd +- Add docker-compose.whaletag.yml: Container orchestration with network mode support +- Add scripts/init-whaletag.sh: Container initialization (user setup, test data, SSH) +- Add .env.whaletag: Configuration file with three network mode options +- Add tests/test_whaletag_integration.py: 6 integration tests for SSH, SFTP, hostname +- Add pytest.ini: Configure @pytest.mark.whaletag marker to exclude tests by default +- Update Makefile: Add whaletag-up, whaletag-down, whaletag-clean, test-whaletag targets +- Update README.md: Add whale tag simulator quick start guide +- Add docs/TESTING.md: Comprehensive testing documentation + +Usage: + make whaletag-up + CONTAINER_IP=$(docker inspect -f '{{range.NetworkSettings.Networks}}{{.IPAddress}}{{end}}' wt-b827eb123456) + ceti whaletag -t $CONTAINER_IP + make whaletag-down + +Testing: + make test-whaletag # Automated tests + pytest -m whaletag # Run whale tag tests only + pytest # Default - excludes whale tag tests + +Network modes: +- Bridge (default): Container gets Docker IP, accessible from host +- Host: Container shares host network (may conflict with SSH port 22) +- Macvlan (advanced): Container gets real LAN IP via DHCP, discoverable by other machines + +Limitations: +- Network discovery (ceti whaletag -l) only works in macvlan mode +- Bridge mode requires direct connection via IP (ceti whaletag -t ) +- Macvlan mode requires manual network interface configuration + +Related issue: Enables local development and CI/CD testing without physical whale tags diff --git a/Dockerfile.whaletag b/Dockerfile.whaletag new file mode 100644 index 0000000..217d766 --- /dev/null +++ b/Dockerfile.whaletag @@ -0,0 +1,30 @@ +# Whale Tag Mockup Docker Image +# Pre-installs SSH server and utilities for network configuration + +FROM alpine:latest + +# Install dependencies (SSH server, DHCP client for macvlan mode, basic utils) +RUN apk add --no-cache \ + openssh-server \ + dhcpcd \ + bash \ + python3 \ + sudo \ + && \ + # Pre-configure SSH + ssh-keygen -A && \ + sed -i 's/#PermitRootLogin.*/PermitRootLogin yes/' /etc/ssh/sshd_config && \ + sed -i 's/#PasswordAuthentication.*/PasswordAuthentication yes/' /etc/ssh/sshd_config && \ + # Create SSH runtime directory + mkdir -p /run/sshd && \ + # Configure sudo for passwordless access (for ceti whaletag -ct command) + echo "pi ALL=(ALL) NOPASSWD: ALL" >> /etc/sudoers + +# Create data directory +RUN mkdir -p /data && chmod 755 /data + +COPY scripts/init-whaletag.sh /docker-entrypoint.sh +COPY scripts/generate-syslog.py /generate-syslog.py +RUN chmod +x /docker-entrypoint.sh /generate-syslog.py + +CMD ["/bin/bash", "/docker-entrypoint.sh"] diff --git a/Makefile b/Makefile index 587dbef..b55adec 100644 --- a/Makefile +++ b/Makefile @@ -24,3 +24,41 @@ release: bumpversion publish: build_tools build login_twine @python -m twine upload --repository codeartifact dist/ceti-* + +# Whale Tag Simulator targets +whaletag-up: + @echo "Starting whale tag simulator..." + @set -a && . $(CURDIR)/.env.whaletag && set +a && docker-compose -f docker-compose.whaletag.yml up -d 2>&1 + @echo "Waiting for SSH server to start..." + @sleep 3 + @echo "" + @echo "Whale tag simulator is ready!" + @echo " Container: $${WHALETAG_HOSTNAME:-wt-b827eb123456}" + @echo " Hostname: $${WHALETAG_HOSTNAME:-wt-b827eb123456}" + @echo " SSH Port: 22" + @echo " SSH User: pi" + @echo " SSH Password: $${WHALETAG_PASSWORD:-ceticeti}" + @echo "" + @echo "Get container IP:" + @echo " docker inspect -f '{{range.NetworkSettings.Networks}}{{.IPAddress}}{{end}}' $${WHALETAG_HOSTNAME:-wt-b827eb123456}" + @echo "" + @echo "Test connection:" + @echo " ssh pi@$$(docker inspect -f '{{range.NetworkSettings.Networks}}{{.IPAddress}}{{end}}' $${WHALETAG_HOSTNAME:-wt-b827eb123456})" + +whaletag-down: + @echo "Stopping whale tag simulator..." + @docker-compose -f docker-compose.whaletag.yml down 2>&1 + @echo "Whale tag simulator stopped" + +whaletag-clean: + @echo "Cleaning whale tag data..." + @docker-compose -f docker-compose.whaletag.yml down -v 2>&1 + @echo "Whale tag data cleaned" + +test-whaletag: + @echo "Testing whale tag simulator..." + @$(MAKE) whaletag-up + @echo "Running whale tag tests..." + @pytest -m whaletag -v || ($(MAKE) whaletag-down && exit 1) + @$(MAKE) whaletag-down + @echo "Whale tag tests completed" diff --git a/README.md b/README.md index fe5743b..c541c51 100644 --- a/README.md +++ b/README.md @@ -214,3 +214,37 @@ There's also a convemnience script scripts/tag.sh that does the following automa 5) copy the back-up of compressed data to /data-backup folder 6) upload all downloaded and compressed data to s3 7) clean all tags + +### Testing + +#### Whale Tag Simulator + +Test `ceti whaletag` commands locally without physical hardware using a Docker-based simulator: + +```console +# Start whale tag simulator +make whaletag-up + +# Get container IP +CONTAINER_IP=$(docker inspect -f '{{range.NetworkSettings.Networks}}{{.IPAddress}}{{end}}' wt-b827eb123456) + +# Test ceti whaletag command +ceti whaletag -t $CONTAINER_IP + +# Or run automated tests +make test-whaletag + +# Stop simulator +make whaletag-down +``` + +The simulator creates a lightweight Alpine Linux container (~12 MB) with: +- SSH server on port 22 +- Docker bridge network IP +- User `pi` with password `ceticeti` +- Hostname `wt-b827eb123456` +- Sample test files in `/data/` (audio .raw/.flac, sensors CSV) + +**Note:** Whale tag tests are excluded from default `pytest` runs. Use `make test-whaletag` or `pytest -m whaletag` to run them explicitly. + +See [docs/TESTING.md](docs/TESTING.md) for complete testing documentation. diff --git a/docker-compose.whaletag.yml b/docker-compose.whaletag.yml new file mode 100644 index 0000000..6cd6d10 --- /dev/null +++ b/docker-compose.whaletag.yml @@ -0,0 +1,56 @@ +services: + whaletag-mockup: + build: + context: . + dockerfile: Dockerfile.whaletag + image: ceti-whaletag-mockup:latest + container_name: ${WHALETAG_HOSTNAME:-wt-b827eb123456} + hostname: ${WHALETAG_HOSTNAME:-wt-b827eb123456} + environment: + - WHALETAG_HOSTNAME=${WHALETAG_HOSTNAME:-wt-b827eb123456} + - WHALETAG_PASSWORD=${WHALETAG_PASSWORD:-ceticeti} + - NETWORK_MODE=${NETWORK_MODE:-bridge} + volumes: + - whaletag-data:/data + - whaletag-backup:/backup + - ./scripts/init-whaletag.sh:/docker-entrypoint.sh + command: /bin/sh /docker-entrypoint.sh + # Network configuration depends on NETWORK_MODE in .env.whaletag + # NOTE: You must manually edit this file to switch modes: + # - For 'bridge': uncomment 'networks: default' section + # - For 'host': uncomment 'network_mode: host' section + # - For 'macvlan': uncomment 'networks: macvlan_network' section + + # BRIDGE MODE (Default - works out of the box) + networks: + - default + + # HOST MODE (Uncomment this, comment out 'networks' above) + # network_mode: host + + # MACVLAN MODE (Advanced - for real LAN discovery, uncomment and configure) + # networks: + # - macvlan_network + # # No static IP - container will use DHCP + # dns: + # - 8.8.8.8 # Google DNS + # - 192.168.0.1 # Your router (fallback) + # cap_add: + # - NET_ADMIN # Required for DHCP client to configure network interface + +volumes: + whaletag-data: + whaletag-backup: + +networks: + # Default bridge network (for bridge mode) + default: + driver: bridge + + # MacVLAN network (for macvlan mode) + # Container gets IP via DHCP from your router + macvlan_network: + driver: macvlan + driver_opts: + parent: ${NETWORK_INTERFACE:-eth0} + # No ipam config - uses DHCP diff --git a/docs/TESTING.md b/docs/TESTING.md new file mode 100644 index 0000000..36c0816 --- /dev/null +++ b/docs/TESTING.md @@ -0,0 +1,185 @@ +# Testing Guide + +This guide covers how to test the CETI data ingestion tools locally. + +## Whale Tag Simulator + +The whale tag simulator allows you to test `ceti whaletag` commands locally without physical hardware. + +### Quick Start + +```bash +# Start the simulator +make whaletag-up + +# Run automated tests +make test-whaletag + +# Stop the simulator +make whaletag-down +``` + +### Manual Testing + +```bash +# Start simulator +make whaletag-up + +# Get container IP +CONTAINER_IP=$(docker inspect -f '{{range.NetworkSettings.Networks}}{{.IPAddress}}{{end}}' wt-b827eb123456) + +# Test SSH connection +ssh pi@$CONTAINER_IP +# Password: ceticeti + +# Inside the container, check test data +ls -la /data + +# Exit SSH +exit + +# Test with ceti whaletag command +ceti whaletag -t $CONTAINER_IP + +# Stop simulator +make whaletag-down +``` + +### What Gets Simulated + +The simulator creates a Docker container with: +- **Base Image**: Alpine Linux (~12 MB) +- **Network**: Docker bridge network (container gets its own IP) +- **SSH Server**: OpenSSH on port 22 +- **User**: `pi` with password `ceticeti` +- **Hostname**: `wt-b827eb123456` (configurable via `.env.whaletag`) +- **Test Data**: Sample files in `/data/`: + - Audio files (`.raw`, `.flac`) + - Sensor data (CSV files) + +### Configuration + +Edit `.env.whaletag` to customize: + +```bash +# Hostname of the simulated whale tag +WHALETAG_HOSTNAME=wt-b827eb123456 + +# SSH password for user 'pi' +WHALETAG_PASSWORD=ceticeti + +# Network mode (bridge, host, or macvlan) +NETWORK_MODE=bridge +``` + +**Network Modes:** +- **bridge** (default): Container gets Docker bridge IP, accessible from host via container IP +- **host**: Container shares host network (may conflict with host SSH on port 22) +- **macvlan** (advanced): Container gets real LAN IP via DHCP, discoverable by `ceti whaletag -l` from other machines + +### Running Tests + +**Default pytest (excludes whale tag tests):** +```bash +pytest # Whale tag tests are NOT run +``` + +**Run only whale tag tests:** +```bash +pytest -m whaletag +``` + +**Run all tests including whale tag:** +```bash +pytest -m "" +``` + +**Automated test workflow:** +```bash +make test-whaletag # Starts simulator → runs tests → stops simulator +``` + +### Test Coverage + +The whale tag integration tests verify: +- SSH connection to simulated tag +- Hostname pattern validation (`wt-*`) +- `/data` directory exists +- Test files present in `/data` +- SFTP file download +- User permissions for `pi` user + +### Limitations + +**What the simulator CAN test:** +- SSH/SFTP connectivity +- File download workflows (`ceti whaletag -t `) +- Hostname validation +- Data cleaning operations + +**What the simulator CANNOT test (bridge mode):** +- Network discovery (`ceti whaletag -l` won't find containers on Docker bridge network) +- Real sensor data capture +- Actual whale tag firmware behavior +- Hardware-specific features + +**Note:** In bridge mode (default), you must use `ceti whaletag -t ` to connect directly. The `-l` discovery flag scans your LAN and won't find Docker bridge containers. For LAN discovery testing, use macvlan mode (see `.env.whaletag` configuration). + +### Troubleshooting + +**Simulator won't start:** +```bash +# Check Docker is running +docker ps + +# View simulator logs +docker logs wt-b827eb123456 +``` + +**Cannot connect via SSH:** +```bash +# Ensure simulator is running +docker ps | grep wt-b827eb123456 + +# Get container IP +docker inspect -f '{{range.NetworkSettings.Networks}}{{.IPAddress}}{{end}}' wt-b827eb123456 + +# Check SSH server status inside container +docker exec wt-b827eb123456 ps aux | grep sshd +``` + +**Clean start:** +```bash +# Remove all simulator data and restart +make whaletag-clean +make whaletag-up +``` + +### Advanced Usage + +**Access simulator shell:** +```bash +docker exec -it wt-b827eb123456 /bin/bash +``` + +**View container logs:** +```bash +docker logs wt-b827eb123456 +``` + +**Inspect test data:** +```bash +docker exec wt-b827eb123456 ls -la /data +``` + +**Test whale tag commands:** +```bash +# Get container IP +CONTAINER_IP=$(docker inspect -f '{{range.NetworkSettings.Networks}}{{.IPAddress}}{{end}}' wt-b827eb123456) + +# Download data from tag +ceti whaletag -t $CONTAINER_IP + +# Clean tag (destructive - removes all /data files) +ceti whaletag -ct $CONTAINER_IP +``` diff --git a/pytest.ini b/pytest.ini new file mode 100644 index 0000000..3b388a5 --- /dev/null +++ b/pytest.ini @@ -0,0 +1,6 @@ +[pytest] +markers = + whaletag: tests requiring whale tag hardware or simulator (deselect with '-m "not whaletag"') + +# By default, exclude whale tag tests unless explicitly requested +addopts = -m "not whaletag" diff --git a/scripts/generate-syslog.py b/scripts/generate-syslog.py new file mode 100644 index 0000000..a63fcfc --- /dev/null +++ b/scripts/generate-syslog.py @@ -0,0 +1,65 @@ +#!/usr/bin/env python3 +""" +Fast syslog generator for whale tag mockup +Generates 800k realistic log lines in ~2 seconds +""" + +import sys + +def generate_syslog(num_lines=800000): + """Generate realistic syslog entries""" + + # Log message templates + templates = [ + "cetiTagApp[1234]: Audio buffer written: {} samples", + "cetiTagApp[1234]: IMU data logged: {} records", + "cetiTagApp[1234]: Battery: {}%", + "systemd[1]: Started session {}", + "kernel: [{}.000000] random: crng init done", + "cetiTagApp[1234]: Pressure: {} Pa", + "cetiTagApp[1234]: Temperature: {}C", + "sshd[5678]: Accepted publickey for pi from 192.168.1.{}", + "cetiTagApp[1234]: State transition: RECORDING", + "cetiTagApp[1234]: Free space: {} GB" + ] + + # Header + print("Jan 4 00:00:00 wt-b827eb123456 cetiTagApp[1234]: System initialization") + print("Jan 4 00:00:01 wt-b827eb123456 kernel: [ 0.000000] Booting Linux on physical CPU 0x0") + + # Generate log lines + for i in range(1, num_lines + 1): + hour = (i // 120000) % 24 + minute = (i // 2000) % 60 + second = (i // 33) % 60 + + template_idx = i % 10 + template = templates[template_idx] + + # Format message with dynamic values + if template_idx == 0: + msg = template.format(i) + elif template_idx == 1: + msg = template.format(i) + elif template_idx == 2: + msg = template.format(max(50, 95 - i // 10000)) + elif template_idx == 3: + msg = template.format(i // 1000) + elif template_idx == 4: + msg = template.format(i // 100) + elif template_idx == 5: + msg = template.format(101325 + (i * 137) % 50000) + elif template_idx == 6: + msg = template.format(18 + (i * 7) % 10) + elif template_idx == 7: + msg = template.format((i * 23) % 255) + elif template_idx == 8: + msg = "cetiTagApp[1234]: State transition: RECORDING" + elif template_idx == 9: + msg = template.format(max(10, 50 - i // 20000)) + + print(f"Jan 4 {hour:02d}:{minute:02d}:{second:02d} wt-b827eb123456 {msg}") + +if __name__ == "__main__": + num_lines = int(sys.argv[1]) if len(sys.argv) > 1 else 800000 + generate_syslog(num_lines) diff --git a/scripts/init-whaletag.sh b/scripts/init-whaletag.sh new file mode 100755 index 0000000..5760760 --- /dev/null +++ b/scripts/init-whaletag.sh @@ -0,0 +1,437 @@ +#!/bin/bash +# Whale Tag Mockup initialization script +# Generates realistic whale tag data structure for testing +# SSH server and DHCP client are pre-installed in the Docker image + +set -e + +echo "Initializing whale tag mockup..." + +# Run DHCP client to get IP from router (macvlan mode only) +# In bridge/host mode, skip DHCP and use Docker-assigned IP +if [ "$NETWORK_MODE" = "macvlan" ]; then + # Macvlan mode - request DHCP lease from router + echo "Macvlan mode - requesting DHCP lease..." + if timeout 5 dhcpcd -1 eth0 2>&1 | grep -q "leased"; then + echo "DHCP lease obtained: $(ip addr show eth0 | grep 'inet ' | awk '{print $2}' | head -1)" + else + echo "DHCP request failed (using Docker-assigned IP)" + fi +else + # Bridge or host mode - skip DHCP + echo "Bridge mode - using Docker-assigned IP" +fi + +# Create user pi with password ceticeti (if not exists) +if ! id pi > /dev/null 2>&1; then + adduser -D -s /bin/bash pi + echo "pi:${WHALETAG_PASSWORD:-ceticeti}" | chpasswd +fi + +# Ensure /data directory ownership +chown pi:pi /data + +# Check if data already exists (for faster restarts) +if [ -f /data/data_battery.csv ] && [ -f /data/logs/syslog ]; then + echo "Existing whale tag data found - skipping generation" + echo "To regenerate data, remove the Docker volume:" + echo " docker volume rm data-ingest_whaletag-data data-ingest_whaletag-backup" +elif [ -f /backup/mockup-data.tar.gz ]; then + echo "Data was cleaned - restoring from backup..." + echo "(Extracting ~500MB from compressed backup, ~10 seconds)" + cd /data + tar -xzf /backup/mockup-data.tar.gz + chown -R pi:pi /data + echo "✓ Data restored successfully!" +else + echo "Generating realistic whale tag data files..." + echo "(This takes ~5 minutes on first run, but persists across container restarts)" + + # Base timestamp: Jan 4, 2024 00:00:00 UTC + BASE_EPOCH_S=1704384000 + BASE_EPOCH_MS=1704384000000 + + # Switch to pi user for file generation (using bash not sh) + su - pi -s /bin/bash <<'EOSU' +cd /data + +# Base timestamps +BASE_EPOCH_S=1704384000 +BASE_EPOCH_MS=1704384000000 + +#============================================================================= +# AUDIO FILES (~200MB total) +#============================================================================= +echo "Creating audio files..." + +# FLAC files (5 files x 20MB = 100MB) +for i in 0 1 2 3 4; do + epoch_ms=$((BASE_EPOCH_MS + i * 3600000)) # 1 hour apart + dd if=/dev/urandom of="${epoch_ms}.flac" bs=1M count=20 2>/dev/null +done + +# RAW files (3 files x 15MB = 45MB) +for i in 5 6 7; do + epoch_ms=$((BASE_EPOCH_MS + i * 3600000)) + dd if=/dev/urandom of="${epoch_ms}.raw" bs=1M count=15 2>/dev/null +done + +#============================================================================= +# SINGLE CSV FILES (~10MB total) +#============================================================================= +echo "Creating single CSV files..." + +# data_battery.csv (1MB) +{ + echo "Timestamp_us,Voltage_mV,Current_mA,Temperature_C,StateOfCharge_pct,RemainingCapacity_mAh" + for i in $(seq 1 20000); do + ts=$((BASE_EPOCH_S * 1000000 + i * 5000000)) + voltage=$((4100 + RANDOM % 100)) + current=$((500 + RANDOM % 200)) + temp=$((20 + RANDOM % 15)) + soc=$((95 - i / 200)) + cap=$((3000 - i / 10)) + echo "$ts,$voltage,$current,$temp,$soc,$cap" + done +} > data_battery.csv + +# data_light.csv (500KB) +{ + echo "Timestamp_us,Light_lux" + for i in $(seq 1 15000); do + ts=$((BASE_EPOCH_S * 1000000 + i * 10000000)) + lux=$((100 + RANDOM % 50000)) + echo "$ts,$lux" + done +} > data_light.csv + +# data_pressure_temperature.csv (1MB) +{ + echo "Timestamp_us,Pressure_Pa,Temperature_C" + for i in $(seq 1 25000); do + ts=$((BASE_EPOCH_S * 1000000 + i * 4000000)) + pressure=$((101325 + RANDOM % 50000)) + temp=$((18 + RANDOM % 5)) + echo "$ts,$pressure,$temp" + done +} > data_pressure_temperature.csv + +# data_gps.csv (500KB) +{ + echo "Timestamp_us,Latitude,Longitude,Altitude_m,Speed_mps,Satellites,HDOP" + for i in $(seq 1 10000); do + ts=$((BASE_EPOCH_S * 1000000 + i * 15000000)) + # Dominica is around 15.3N, -61.4W + lat="15.$((3000 + i % 10000))" + lon="-61.$((4000 + i % 10000))" + alt=$((0 + RANDOM % 100)) + speed=$((RANDOM % 10)) + sats=$((8 + RANDOM % 4)) + hdop="0.$((80 + RANDOM % 100))" + echo "$ts,$lat,$lon,$alt,$speed,$sats,$hdop" + done +} > data_gps.csv + +# data_state.csv (100KB) +{ + echo "Timestamp_us,State,Notes" + states=("IDLE" "RECORDING" "SURFACING" "TRANSMITTING" "SLEEPING") + for i in $(seq 1 2000); do + ts=$((BASE_EPOCH_S * 1000000 + i * 60000000)) + state=${states[$((RANDOM % 5))]} + echo "$ts,$state," + done +} > data_state.csv + +# data_audio_status.csv (1MB) +{ + echo "Timestamp [us],RTC Count,Notes,Overflow,Overflow Detection Location,Start Writing,Done Writing,See SPI Block" + for i in $(seq 1 25000); do + ts=$((BASE_EPOCH_S * 1000000 + i * 5000000)) + rtc=$((i * 192000)) + overflow=$((RANDOM % 100 < 1 ? 1 : 0)) + echo "$ts,$rtc,,$overflow,,,," + done +} > data_audio_status.csv + +# data_systemMonitor.csv (2MB) +{ + echo "CPU all [%],CPU 0 [%],CPU 1 [%],CPU 2 [%],CPU 3 [%],RAM used [MB],RAM free [MB],Swap used [MB],Data partition used [GB],Data partition free [GB],CPU temp [C],GPU temp [C]" + for i in $(seq 1 35000); do + cpu_all=$((20 + RANDOM % 60)) + cpu0=$((10 + RANDOM % 70)) + cpu1=$((10 + RANDOM % 70)) + cpu2=$((10 + RANDOM % 70)) + cpu3=$((10 + RANDOM % 70)) + ram_used=$((200 + RANDOM % 300)) + ram_free=$((200 + RANDOM % 200)) + swap=$((RANDOM % 50)) + disk_used=$((10 + i / 3500)) + disk_free=$((50 - i / 3500)) + temp_cpu=$((45 + RANDOM % 20)) + temp_gpu=$((40 + RANDOM % 25)) + echo "$cpu_all,$cpu0,$cpu1,$cpu2,$cpu3,$ram_used,$ram_free,$swap,$disk_used,$disk_free,$temp_cpu,$temp_gpu" + done +} > data_systemMonitor.csv + +# burnwire_timeout_start_time_s.csv (10KB) +{ + echo "Timestamp_us,Burnwire_timeout_start_time_s" + echo "$((BASE_EPOCH_S * 1000000)),$((BASE_EPOCH_S + 86400))" +} > burnwire_timeout_start_time_s.csv + +#============================================================================= +# MULTI-FILE CSV DATASETS WITH COUNTERS (~250MB total) +#============================================================================= +echo "Creating multi-file CSV datasets..." + +# IMU quaternion files (2 files x 25MB = 50MB) - ETA: ~30sec per file +for file_idx in 0 1; do + echo " Creating data_imu_quat_$(printf '%02d' $file_idx).csv (file $((file_idx + 1))/2)..." + { + echo "Capture_Timestamp_us,Read_Timestamp_us,RTC Count,Notes,Quat_i,Quat_j,Quat_k,Quat_Re,Quat_accuracy" + for i in $(seq 1 500000); do + cap_ts=$((BASE_EPOCH_S * 1000000 + i * 1000 + file_idx * 500000000)) + read_ts=$((cap_ts + 100)) + rtc=$((i * 20)) + # Simplified quaternion values (normalized random values) + qi="0.$((RANDOM % 1000000))" + qj="0.$((RANDOM % 1000000))" + qk="0.$((RANDOM % 1000000))" + qr="0.$((RANDOM % 1000000))" + acc=$((RANDOM % 3)) + echo "$cap_ts,$read_ts,$rtc,,$qi,$qj,$qk,$qr,$acc" + done + } > "data_imu_quat_$(printf '%02d' $file_idx).csv" + echo " ✓ Completed data_imu_quat_$(printf '%02d' $file_idx).csv" +done + +# IMU accelerometer files (2 files x 25MB = 50MB) - ETA: ~1min per file +for file_idx in 0 1; do + echo " Creating data_imu_accel_$(printf '%02d' $file_idx).csv (file $((file_idx + 1))/2)..." + { + echo "Capture_Timestamp_us,Read_Timestamp_us,RTC Count,Notes,Accel_x_raw,Accel_y_raw,Accel_z_raw,Accel_status" + for i in $(seq 1 500000); do + cap_ts=$((BASE_EPOCH_S * 1000000 + i * 400 + file_idx * 200000000)) + read_ts=$((cap_ts + 50)) + rtc=$((i * 50)) + ax=$((RANDOM % 4096 - 2048)) + ay=$((RANDOM % 4096 - 2048)) + az=$((RANDOM % 4096 - 2048)) + status=0 + echo "$cap_ts,$read_ts,$rtc,,$ax,$ay,$az,$status" + done + } > "data_imu_accel_$(printf '%02d' $file_idx).csv" + echo " ✓ Completed data_imu_accel_$(printf '%02d' $file_idx).csv" +done + +# IMU gyroscope file (1 file x 25MB) - ETA: ~1min +echo " Creating data_imu_gyro_00.csv..." +{ + echo "Capture_Timestamp_us,Read_Timestamp_us,RTC Count,Notes,Gyro_x_raw,Gyro_y_raw,Gyro_z_raw,Gyro_status" + for i in $(seq 1 500000); do + cap_ts=$((BASE_EPOCH_S * 1000000 + i * 400)) + read_ts=$((cap_ts + 50)) + rtc=$((i * 50)) + gx=$((RANDOM % 4096 - 2048)) + gy=$((RANDOM % 4096 - 2048)) + gz=$((RANDOM % 4096 - 2048)) + status=0 + echo "$cap_ts,$read_ts,$rtc,,$gx,$gy,$gz,$status" + done +} > data_imu_gyro_00.csv +echo " ✓ Completed data_imu_gyro_00.csv" + +# IMU magnetometer file (1 file x 25MB) - ETA: ~1min +echo " Creating data_imu_mag_00.csv..." +{ + echo "Capture_Timestamp_us,Read_Timestamp_us,RTC Count,Notes,Mag_x_raw,Mag_y_raw,Mag_z_raw,Mag_status" + for i in $(seq 1 500000); do + cap_ts=$((BASE_EPOCH_S * 1000000 + i * 400)) + read_ts=$((cap_ts + 50)) + rtc=$((i * 50)) + mx=$((RANDOM % 4096 - 2048)) + my=$((RANDOM % 4096 - 2048)) + mz=$((RANDOM % 4096 - 2048)) + status=0 + echo "$cap_ts,$read_ts,$rtc,,$mx,$my,$mz,$status" + done +} > data_imu_mag_00.csv +echo " ✓ Completed data_imu_mag_00.csv" + +# ECG files (2 files x 25MB = 50MB) - ETA: ~2min per file +for file_idx in 0 1; do + echo " Creating data_ecg_$(printf '%02d' $file_idx).csv (file $((file_idx + 1))/2)..." + { + echo "Sample Index,ECG,Leads-Off-P,Leads-Off-N" + for i in $(seq 1 1000000); do + idx=$((i + file_idx * 1000000)) + ecg=$((32768 + RANDOM % 1000 - 500)) + lop=0 + lon=0 + echo "$idx,$ecg,$lop,$lon" + done + } > "data_ecg_$(printf '%02d' $file_idx).csv" + echo " ✓ Completed data_ecg_$(printf '%02d' $file_idx).csv" +done + +#============================================================================= +# METADATA FILES (~1MB total) +#============================================================================= +echo "Creating metadata files..." + +# data_config_.txt +cat > "data_config_${BASE_EPOCH_S}.txt" <.yaml +cat > "data_tag_info_${BASE_EPOCH_S}.yaml" < logs/syslog +echo " ✓ Completed logs/syslog" + +#============================================================================= +# FALSE POSITIVES - Directories that should NOT be downloaded +#============================================================================= +echo "Creating system directories (false positives)..." + +# swap directory with swapfile (should be excluded from download) +mkdir -p swap +dd if=/dev/zero of=swap/swapfile bs=1M count=10 2>/dev/null +chmod 600 swap/swapfile + +# lost+found directory (filesystem recovery, should be excluded) +mkdir -p lost+found +chmod 700 lost+found +# Add some fake recovered file fragments +echo "corrupted data fragment" > lost+found/#12345 +echo "partial file recovery" > lost+found/#67890 + +echo "" +echo "=========================================" +echo "Data generation complete!" +echo "=========================================" +echo "" +echo "Summary:" +echo " Audio files: 8 files (~155MB)" +echo " CSV files: 8 single + 10 multi-file (~60MB + ~250MB)" +echo " Metadata: 2 files (~1MB)" +echo " Logs: 1 file (~40MB)" +echo " System dirs: swap/, lost+found/" +echo "" +echo "Total size: ~510MB" +echo "" +EOSU + +# Hostname is already set by Docker Compose +# No need to set it again (and would require privileged mode) + +echo "=========================================" +echo "Whale tag mockup initialized!" +echo "=========================================" +echo "Hostname: $(hostname)" +echo "SSH user: pi" +echo "SSH password: ${WHALETAG_PASSWORD:-ceticeti}" +echo "Data directory: /data" +echo "" +echo "File structure:" +su - pi -c "ls -lh /data | head -25" +echo "..." +echo "" +echo "Total size:" +su - pi -c "du -sh /data" +echo "" + +# Create compressed backup for quick restoration after testing cleanup +if [ ! -f /backup/mockup-data.tar.gz ]; then + echo "" + echo "Creating backup for quick restoration after cleanup tests..." + cd /data + tar -czf /backup/mockup-data.tar.gz \ + --exclude='lost+found' \ + *.flac *.raw *.csv *.txt *.yaml logs/ swap/ 2>/dev/null || true + chmod 600 /backup/mockup-data.tar.gz + echo "✓ Backup created in separate volume (not in /data)" + du -sh /backup/mockup-data.tar.gz +fi + +echo "" +echo "=========================================" +echo "Ready for testing!" +echo "=========================================" +echo "" +echo "Test workflow:" +echo " 1. Download: ceti whaletag -t " +echo " 2. Clean: ceti whaletag -ct " +echo " 3. Restart: docker restart wt-b827eb123456" +echo " 4. → Data auto-restores from backup in ~10 seconds" +fi + +# Start SSH server in foreground +echo "Starting SSH server..." +exec /usr/sbin/sshd -D diff --git a/tests/test_whaletag_integration.py b/tests/test_whaletag_integration.py new file mode 100644 index 0000000..a0cbd4c --- /dev/null +++ b/tests/test_whaletag_integration.py @@ -0,0 +1,396 @@ +""" +Whale Tag Integration Tests + +These tests require a whale tag simulator or real hardware to be available. +They are excluded from default pytest runs via pytest markers. + +To run these tests: + make test-whaletag # Starts simulator, runs tests, stops simulator + pytest -m whaletag # Run with existing simulator/hardware +""" + +import os +import pytest +import paramiko + + +# Whale tag connection parameters +# For simulator: get IP with docker inspect +WHALETAG_HOSTNAME = os.getenv('WHALETAG_HOSTNAME', 'wt-b827eb123456') +WHALETAG_HOST = os.getenv('WHALETAG_HOST') # If not set, will auto-detect from Docker +WHALETAG_PORT = int(os.getenv('WHALETAG_PORT', '22')) # Standard SSH port +WHALETAG_USERNAME = 'pi' +WHALETAG_PASSWORD = os.getenv('WHALETAG_PASSWORD', 'ceticeti') +WHALETAG_HOSTNAME_PATTERN = r'^wt-[a-z0-9]{6,}$' + + +def get_whaletag_ip(): + """Get IP address of whale tag simulator container""" + if WHALETAG_HOST: + return WHALETAG_HOST + + # Auto-detect container IP + import subprocess + try: + result = subprocess.run( + ['docker', 'inspect', '-f', + '{{range.NetworkSettings.Networks}}{{.IPAddress}}{{end}}', + WHALETAG_HOSTNAME], + capture_output=True, text=True, check=True + ) + ip = result.stdout.strip() + if ip: + return ip + except subprocess.CalledProcessError: + pass + + raise RuntimeError( + f"Cannot find whale tag simulator. " + f"Set WHALETAG_HOST or ensure container '{WHALETAG_HOSTNAME}' is running." + ) + + +@pytest.fixture +def ssh_client(): + """Create SSH client connection to whale tag""" + host = get_whaletag_ip() + client = paramiko.SSHClient() + client.set_missing_host_key_policy(paramiko.AutoAddPolicy()) + client.connect( + hostname=host, + port=WHALETAG_PORT, + username=WHALETAG_USERNAME, + password=WHALETAG_PASSWORD, + timeout=10 + ) + yield client + client.close() + + +@pytest.mark.whaletag +def test_ssh_connection(ssh_client): + """Test SSH connection to whale tag""" + stdin, stdout, stderr = ssh_client.exec_command('echo "test"') + output = stdout.read().decode().strip() + assert output == 'test', "SSH command execution failed" + + +@pytest.mark.whaletag +def test_hostname_pattern(ssh_client): + """Test that hostname matches wt-* pattern""" + import re + stdin, stdout, stderr = ssh_client.exec_command('hostname') + hostname = stdout.read().decode().strip() + assert re.match(WHALETAG_HOSTNAME_PATTERN, hostname), \ + f"Hostname '{hostname}' does not match pattern '{WHALETAG_HOSTNAME_PATTERN}'" + + +@pytest.mark.whaletag +def test_data_directory_exists(ssh_client): + """Test that /data directory exists""" + stdin, stdout, stderr = ssh_client.exec_command('test -d /data && echo "exists"') + output = stdout.read().decode().strip() + assert output == 'exists', "/data directory does not exist" + + +@pytest.mark.whaletag +def test_data_directory_has_files(ssh_client): + """Test that /data directory contains realistic whale tag files""" + stdin, stdout, stderr = ssh_client.exec_command('ls -1 /data') + files = stdout.read().decode().strip().split('\n') + assert len(files) > 0, "/data directory is empty" + + # Check for realistic whale tag file patterns + file_str = ' '.join(files) + + # Audio files: .flac or .raw + assert any(f.endswith('.flac') or f.endswith('.raw') for f in files), \ + "No audio files (.flac/.raw) found in /data" + + # CSV sensor data files + assert any(f.startswith('data_') and f.endswith('.csv') for f in files), \ + "No CSV sensor files (data_*.csv) found in /data" + + # Metadata files + assert any(f.startswith('data_config_') and f.endswith('.txt') for f in files), \ + "No config metadata files found in /data" + + # Logs directory + assert 'logs' in files, "logs/ directory not found in /data" + + # False positives (should exist but not be downloaded) + assert 'swap' in files, "swap/ directory not found (false positive test)" + assert 'lost+found' in files, "lost+found/ directory not found (false positive test)" + + +@pytest.mark.whaletag +def test_sftp_download(ssh_client): + """Test SFTP file download from whale tag""" + import tempfile + from pathlib import Path + + sftp = ssh_client.open_sftp() + + # List files in /data + files = sftp.listdir('/data') + assert len(files) > 0, "/data directory is empty" + + # Download a realistic audio file (should be large) + audio_files = [f for f in files if f.endswith('.flac') or f.endswith('.raw')] + assert len(audio_files) > 0, "No audio files to download" + + remote_file = f'/data/{audio_files[0]}' + + with tempfile.TemporaryDirectory() as tmpdir: + local_file = Path(tmpdir) / audio_files[0] + sftp.get(remote_file, str(local_file)) + assert local_file.exists(), f"Failed to download {remote_file}" + assert local_file.stat().st_size > 1024 * 1024, \ + f"Downloaded file too small: {local_file.stat().st_size} bytes (expected >1MB)" + + sftp.close() + + +@pytest.mark.whaletag +def test_sftp_recursive_directory_listing(ssh_client): + """Test SFTP can list directories recursively (logs/, swap/, etc.)""" + sftp = ssh_client.open_sftp() + + # List /data directory + files = sftp.listdir('/data') + assert 'logs' in files, "logs/ directory not found in SFTP listing" + + # List logs/ subdirectory + log_files = sftp.listdir('/data/logs') + assert 'syslog' in log_files, "syslog not found in /data/logs" + + # Verify we can stat the syslog file + stat = sftp.stat('/data/logs/syslog') + assert stat.st_size > 1024 * 1024, "syslog file should be >1MB" + + sftp.close() + + +@pytest.mark.whaletag +def test_sftp_download_csv_file(ssh_client): + """Test SFTP download of CSV sensor data files""" + import tempfile + from pathlib import Path + + sftp = ssh_client.open_sftp() + + # Find a CSV file + files = sftp.listdir('/data') + csv_files = [f for f in files if f.endswith('.csv')] + assert len(csv_files) > 0, "No CSV files found" + + remote_file = f'/data/{csv_files[0]}' + + with tempfile.TemporaryDirectory() as tmpdir: + local_file = Path(tmpdir) / csv_files[0] + sftp.get(remote_file, str(local_file)) + + assert local_file.exists(), f"Failed to download {remote_file}" + + # Verify it's a valid CSV by checking first line has comma-separated values + with open(local_file) as f: + first_line = f.readline() + assert ',' in first_line, "Downloaded CSV file has invalid format" + + sftp.close() + + +@pytest.mark.whaletag +def test_user_permissions(ssh_client): + """Test that user pi can access /data directory""" + stdin, stdout, stderr = ssh_client.exec_command('whoami') + user = stdout.read().decode().strip() + assert user == 'pi', f"Expected user 'pi', got '{user}'" + + # Test write permission + stdin, stdout, stderr = ssh_client.exec_command('touch /data/.test && rm /data/.test && echo "success"') + output = stdout.read().decode().strip() + assert output == 'success', "User pi does not have write permission to /data" + + +@pytest.mark.whaletag +def test_file_ownership(ssh_client): + """Test that files in /data are owned by pi:pi""" + stdin, stdout, stderr = ssh_client.exec_command('ls -la /data/*.flac /data/*.csv 2>/dev/null | head -5') + output = stdout.read().decode() + + # Parse ls -la output and check ownership + for line in output.strip().split('\n'): + if not line or line.startswith('total'): + continue + parts = line.split() + if len(parts) >= 3: + owner = parts[2] # 3rd column is owner + group = parts[3] # 4th column is group + assert owner == 'pi', f"File not owned by pi: {line}" + assert group == 'pi', f"File not in pi group: {line}" + + +@pytest.mark.whaletag +def test_sudo_permissions(ssh_client): + """Test that user pi has passwordless sudo access""" + # Test sudo without password (required for ceti whaletag -ct cleanup) + stdin, stdout, stderr = ssh_client.exec_command('sudo -n whoami') + output = stdout.read().decode().strip() + error = stderr.read().decode().strip() + + assert output == 'root', f"sudo failed: {error}" + assert 'password' not in error.lower(), "sudo requires password (should be passwordless)" + + +@pytest.mark.whaletag +def test_false_positives_exist(ssh_client): + """Test that false positive directories exist (swap, lost+found)""" + # These directories should exist but should NOT be downloaded by ceti whaletag + stdin, stdout, stderr = ssh_client.exec_command('test -d /data/swap && echo "swap_exists"') + output = stdout.read().decode().strip() + assert output == 'swap_exists', "swap/ directory should exist as false positive" + + stdin, stdout, stderr = ssh_client.exec_command('test -d /data/lost+found && echo "lost_exists"') + output = stdout.read().decode().strip() + assert output == 'lost_exists', "lost+found/ directory should exist as false positive" + + +@pytest.mark.whaletag +def test_realistic_file_sizes(ssh_client): + """Test that files have realistic sizes (not empty placeholders)""" + # Check audio files have realistic size (>10MB) + stdin, stdout, stderr = ssh_client.exec_command('ls -lh /data/*.flac /data/*.raw 2>/dev/null | head -3') + output = stdout.read().decode() + + sizes = [] + for line in output.strip().split('\n'): + if not line or line.startswith('total'): + continue + parts = line.split() + if len(parts) >= 5: + size_str = parts[4] # 5th column is size + sizes.append(size_str) + + assert len(sizes) > 0, "No audio files found" + + # Check that at least one file is >10M + large_files = [s for s in sizes if 'M' in s or 'G' in s] + assert len(large_files) > 0, f"Audio files should be >10MB, got: {sizes}" + + +@pytest.mark.whaletag +def test_multi_file_csv_datasets(ssh_client): + """Test that multi-file CSV datasets exist with counter suffixes""" + # Check for IMU files with _00, _01 suffixes + stdin, stdout, stderr = ssh_client.exec_command('ls /data/data_imu_*_00.csv 2>/dev/null | wc -l') + count = int(stdout.read().decode().strip()) + assert count > 0, "No multi-file IMU datasets found (data_imu_*_00.csv)" + + # Check for ECG files with counter + stdin, stdout, stderr = ssh_client.exec_command('ls /data/data_ecg_*.csv 2>/dev/null | wc -l') + count = int(stdout.read().decode().strip()) + assert count > 0, "No ECG datasets found (data_ecg_*.csv)" + + +@pytest.mark.whaletag +def test_logs_directory_with_syslog(ssh_client): + """Test that logs directory contains syslog file""" + stdin, stdout, stderr = ssh_client.exec_command('test -f /data/logs/syslog && echo "exists"') + output = stdout.read().decode().strip() + assert output == 'exists', "logs/syslog file not found" + + # Check syslog has realistic size (should be large - 800k lines) + stdin, stdout, stderr = ssh_client.exec_command('wc -l /data/logs/syslog') + line_count = int(stdout.read().decode().split()[0]) + assert line_count > 100000, f"syslog should have >100k lines, got {line_count}" + + +@pytest.mark.whaletag +def test_ssh_server_running(ssh_client): + """Test that SSH server is running and accessible""" + # Check sshd process is running + stdin, stdout, stderr = ssh_client.exec_command('ps aux | grep sshd | grep -v grep') + output = stdout.read().decode() + assert 'sshd' in output, "SSH server (sshd) is not running" + + # Verify we can execute commands (already proven by ssh_client fixture working) + stdin, stdout, stderr = ssh_client.exec_command('echo "ssh_works"') + result = stdout.read().decode().strip() + assert result == 'ssh_works', "SSH command execution failed" + + +@pytest.mark.whaletag +def test_backup_exists(ssh_client): + """Test that backup archive exists in /backup volume""" + stdin, stdout, stderr = ssh_client.exec_command('test -f /backup/mockup-data.tar.gz && echo "exists"') + output = stdout.read().decode().strip() + assert output == 'exists', "Backup file /backup/mockup-data.tar.gz not found" + + # Check backup size is reasonable (should be ~200MB compressed) + stdin, stdout, stderr = ssh_client.exec_command('du -h /backup/mockup-data.tar.gz') + size_output = stdout.read().decode().strip() + size_str = size_output.split()[0] + assert 'M' in size_str or 'G' in size_str, \ + f"Backup file too small: {size_str} (expected >100M)" + + +@pytest.mark.whaletag +def test_data_cleanup_and_restore(): + """Test data cleanup with sudo rm and restore from backup on container restart""" + import subprocess + import time + + # Get container name + container_name = os.getenv('WHALETAG_HOSTNAME', 'wt-b827eb123456') + + # Step 1: Verify data exists before cleanup + result = subprocess.run( + ['docker', 'exec', container_name, 'ls', '/data'], + capture_output=True, text=True + ) + initial_files = result.stdout.strip().split('\n') + assert len(initial_files) > 5, "Not enough files in /data before cleanup" + assert any('.flac' in f or '.csv' in f for f in initial_files), \ + "No data files found before cleanup" + + # Step 2: Clean data using sudo rm (simulates ceti whaletag -ct) + # Need shell expansion for glob pattern + subprocess.run( + ['docker', 'exec', container_name, 'sh', '-c', 'sudo rm -rf /data/*.*'], + check=True + ) + + # Verify data is cleaned (only directories remain) + result = subprocess.run( + ['docker', 'exec', container_name, 'ls', '/data'], + capture_output=True, text=True + ) + remaining_files = result.stdout.strip().split('\n') + # Should only have directories left (logs, swap, lost+found) + assert not any('.flac' in f or '.csv' in f for f in remaining_files), \ + "Data files still exist after cleanup" + + # Step 3: Restart container to trigger restore + subprocess.run(['docker', 'restart', container_name], check=True) + + # Wait for container to restart and restore data + time.sleep(15) + + # Step 4: Verify data is restored + result = subprocess.run( + ['docker', 'exec', container_name, 'ls', '/data'], + capture_output=True, text=True + ) + restored_files = result.stdout.strip().split('\n') + assert len(restored_files) > 5, "Not enough files after restore" + assert any('.flac' in f for f in restored_files), "Audio files not restored" + assert any('.csv' in f for f in restored_files), "CSV files not restored" + + # Verify SSH server is running after restore + time.sleep(2) + result = subprocess.run( + ['docker', 'exec', container_name, 'pgrep', 'sshd'], + capture_output=True, text=True + ) + assert result.returncode == 0, "SSH server not running after restore"