From 392ce3bd2a96b3a7153984369c9fa3100b1ef8bb Mon Sep 17 00:00:00 2001 From: Sachin Sharma Date: Tue, 17 Mar 2026 14:23:04 +0530 Subject: [PATCH 1/4] updated metrics folder for din in --- dine-in/docker-compose.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dine-in/docker-compose.yml b/dine-in/docker-compose.yml index e85d291..1db7168 100644 --- a/dine-in/docker-compose.yml +++ b/dine-in/docker-compose.yml @@ -180,7 +180,7 @@ services: - METRICS_DIR=/tmp/results - DEVICE_ENV_PATH=/configs/device.env volumes: - - ./metrics:/tmp/results + - ./results:/tmp/results - ./configs:/configs - /tmp/.X11-unix:/tmp/.X11-unix - /sys/devices:/sys/devices From 25c79e05cc6e4e8159ca1190d313b052cd06eb2f Mon Sep 17 00:00:00 2001 From: Jitendra Date: Tue, 17 Mar 2026 17:59:26 +0530 Subject: [PATCH 2/4] updated document and resolved issues --- dine-in/Makefile | 9 ++---- dine-in/README.md | 2 +- dine-in/docs/user-guide/get-started.md | 15 ++------- take-away/.env.example | 4 +-- take-away/Dockerfile | 3 +- take-away/Makefile | 11 ++++--- take-away/config/orders.json | 1 - take-away/docker-compose.yaml | 6 ++-- .../docs/user-guide/benchmarking-guide.md | 2 +- take-away/docs/user-guide/get-started.md | 31 +++++++++++++------ .../user-guide/how-to-build-from-source.md | 16 ---------- .../docs/user-guide/system-requirements.md | 24 ++------------ take-away/src/core/ovms_client.py | 5 +-- take-away/src/ocr_worker.py | 31 +++++++++++++++++-- 14 files changed, 79 insertions(+), 81 deletions(-) diff --git a/dine-in/Makefile b/dine-in/Makefile index e95e541..85a7c78 100755 --- a/dine-in/Makefile +++ b/dine-in/Makefile @@ -17,7 +17,7 @@ CONCURRENCY_MAX ?= 10 REQUESTS_PER_LEVEL ?= 10 REQUEST_TIMEOUT ?= 30 RESULTS_DIR ?= results -METRICS_DIR ?= metrics +METRICS_DIR ?= results OOM_PROTECTION ?= 1 # Performance tools path @@ -431,10 +431,7 @@ benchmark-metrics: echo "No benchmark metrics found. Run 'make benchmark' first."; \ fi -benchmark-stream-density: benchmark-density - @echo "Note: benchmark-stream-density is deprecated, use 'make benchmark-density' instead" - -benchmark-density: ## Run Dine-In stream density benchmark (image-based latency) +benchmark-stream-density: ## Run Dine-In stream density benchmark (image-based latency) @if [ "$(OOM_PROTECTION)" = "0" ]; then \ echo "╔════════════════════════════════════════════════════════════╗"; \ echo "║ WARNING ║"; \ @@ -558,7 +555,7 @@ plot-metrics: ## Generate plots from benchmark metrics @echo "╔═══════════════════════════════════════════════════════════════════╗" @echo "║ Generating Metrics Plots ║" @echo "╚═══════════════════════════════════════════════════════════════════╝" - @sudo chmod -R ugo+rw $(CURDIR)/$(METRICS_DIR) 2>/dev/null || true + @chmod -R ugo+rw $(CURDIR)/$(METRICS_DIR) 2>/dev/null || true cd $(PERF_TOOLS_DIR) && \ ( \ python3 -m venv venv && \ diff --git a/dine-in/README.md b/dine-in/README.md index d4219bd..8f033fb 100644 --- a/dine-in/README.md +++ b/dine-in/README.md @@ -14,7 +14,7 @@ ### Prerequisites - Docker 24.0+ with Compose V2 -- NVIDIA GPU with 8GB+ VRAM (or Intel GPU) +- Intel GPU - 32GB+ RAM recommended - Intel Xeon or equivalent CPU diff --git a/dine-in/docs/user-guide/get-started.md b/dine-in/docs/user-guide/get-started.md index ccd1870..02b5f98 100644 --- a/dine-in/docs/user-guide/get-started.md +++ b/dine-in/docs/user-guide/get-started.md @@ -24,7 +24,7 @@ This guide walks you through the installation, configuration, and first-run of t |-----------|---------|-------------| | CPU | Intel Xeon 8 cores | Intel Xeon 16+ cores | | RAM | 16GB | 32GB+ | -| GPU | Intel Arc A770 (8GB) | Intel Arc / NVIDIA RTX 3080+ | +| GPU | Intel Arc A770 (8GB) | Intel Arc | | Storage | 50GB SSD | 200GB NVMe | | Network | 1 Gbps | 10 Gbps | @@ -34,7 +34,6 @@ This guide walks you through the installation, configuration, and first-run of t |----------|---------|---------| | Docker | 24.0+ | Container runtime | | Docker Compose | V2+ | Service orchestration | -| NVIDIA Driver | 535+ | GPU support (if NVIDIA) | | Intel GPU Driver | Latest | GPU support (if Intel) | | Python | 3.10+ | Local development (optional) | @@ -49,10 +48,6 @@ docker --version docker compose version # Expected: Docker Compose version v2.x.x -# GPU availability (NVIDIA) -nvidia-smi -# OR for Intel -clinfo | head -20 ``` --- @@ -447,8 +442,7 @@ docker logs dinein_ovms_vlm ls -la ../ovms-service/models/ # Check GPU availability -clinfo | head -20 # Intel -nvidia-smi # NVIDIA +clinfo | head -20 # Inte ``` ### Connection Refused to OVMS @@ -486,7 +480,7 @@ netstat -tulpn | grep -E "7861|8083|8002|8081" **Solution**: - Ensure GPU drivers are installed -- Check GPU utilization: `intel_gpu_top` or `nvidia-smi` +- Check GPU utilization: `intel_gpu_top` - Verify OVMS is using GPU in logs: `docker logs dinein_ovms_vlm | grep -i gpu` - Consider reducing image resolution in preprocessing @@ -522,9 +516,6 @@ sudo usermod -aG video $USER # Verify GPU access ls -la /dev/dri/ -# For NVIDIA -nvidia-smi -sudo systemctl restart docker ``` ### No Scenarios Available in UI diff --git a/take-away/.env.example b/take-away/.env.example index d09e4c4..a0ea44a 100644 --- a/take-away/.env.example +++ b/take-away/.env.example @@ -114,8 +114,8 @@ NO_PROXY=localhost,127.0.0.1,order-accuracy,minio,ovms-vlm,semantic-service,rtsp # Example (Pexels direct download): # SAMPLE_VIDEO_URL=https://www.pexels.com/download/video/12345678 # Example (any direct .mp4 link): -# SAMPLE_VIDEO_URL=https://example.com/path/to/video.mp4 -SAMPLE_VIDEO_URL= +# SAMPLE_VIDEO_URL=https://www.pexels.com/download/video/35120443/ +SAMPLE_VIDEO_URL=https://www.pexels.com/download/video/35120438/ # SAMPLE_VIDEO_DEST: Local destination path for the downloaded video. # Default is storage/videos/test.mp4 (used by RTSP streamer and benchmark). diff --git a/take-away/Dockerfile b/take-away/Dockerfile index ca9845c..7e83600 100755 --- a/take-away/Dockerfile +++ b/take-away/Dockerfile @@ -31,7 +31,8 @@ ENV TZ=UTC # ------------------------------- # Install system dependencies # ------------------------------- -RUN apt-get update && \ +RUN rm -f /etc/apt/sources.list.d/intel-gpu*.list 2>/dev/null; \ + apt-get update && \ apt-get install -y --no-install-recommends \ tzdata \ git \ diff --git a/take-away/Makefile b/take-away/Makefile index 59ae042..b1db282 100644 --- a/take-away/Makefile +++ b/take-away/Makefile @@ -59,7 +59,6 @@ TAG ?= 2026.0-rc1 # Sample video for quick-start testing (orders 384 → 651 → 925) # Hosted on the upstream repo's GitHub Releases. Override via env if needed: # SAMPLE_VIDEO_URL= make download-sample-video -SAMPLE_VIDEO_URL ?= https://github.com/intel-retail/order-accuracy/releases/download/2026.0-rc1/test.mp4 SAMPLE_VIDEO_DEST ?= storage/videos/test.mp4 # Registry configuration: true=pull from registry, false=build locally @@ -76,7 +75,7 @@ BENCHMARK_DURATION ?= 200 # OCR warmup - reduced from 5 to 2 for faster startup (~10s vs ~25s) OCR_WARMUP_FRAMES ?= 2 BENCHMARK_WORKER_INCREMENT ?= 1 -BENCHMARK_MIN_TRANSACTIONS ?= 3 +BENCHMARK_MIN_TRANSACTIONS ?= 1 BENCHMARK_CONTAINER ?= oa_service # Docker compose file @@ -439,9 +438,11 @@ benchmark: setup-dirs ## Run Order Accuracy benchmark (uses benchmark_order_accu --workers $(BENCHMARK_WORKERS) \ --duration $(BENCHMARK_DURATION) \ --init_duration $(BENCHMARK_INIT_DURATION) \ + --profile benchmark \ --results_dir $(CURDIR)/$(RESULTS_DIR) \ --target_device $(OPENVINO_DEVICE) \ - --skip_perf_tools; \ + --skip_perf_tools \ + --skip_export; \ deactivate \ ) @@ -491,9 +492,11 @@ benchmark-oa: setup-dirs ## Run Order Accuracy benchmark with fixed workers --workers $(BENCHMARK_WORKERS) \ --duration $(BENCHMARK_DURATION) \ --init_duration $(BENCHMARK_INIT_DURATION) \ + --profile benchmark \ --results_dir $(CURDIR)/$(RESULTS_DIR) \ --target_device $(OPENVINO_DEVICE) \ - --skip_perf_tools; \ + --skip_perf_tools \ + --skip_export; \ deactivate \ ) diff --git a/take-away/config/orders.json b/take-away/config/orders.json index 3a4f52f..e890863 100755 --- a/take-away/config/orders.json +++ b/take-away/config/orders.json @@ -20,7 +20,6 @@ { "name": "yellow banana", "quantity": 1 } ], "651": [ - { "name": "water bottle", "quantity": 1 }, { "name": "banana", "quantity": 2 }, { "name": "coke 2 liter bottle", "quantity": 1 } ], diff --git a/take-away/docker-compose.yaml b/take-away/docker-compose.yaml index c79392d..3f20e4d 100755 --- a/take-away/docker-compose.yaml +++ b/take-away/docker-compose.yaml @@ -321,7 +321,8 @@ services: - no_proxy=${NO_PROXY} container_name: oa_rtsp_streamer profiles: - - parallel # Only start this service in parallel mode + - parallel + - benchmark depends_on: order-accuracy: condition: service_healthy @@ -365,7 +366,8 @@ services: image: intel/retail-benchmark:2026.0-rc1 container_name: metrics-collector profiles: - - benchmark # Only start during benchmarking, not with regular 'make up' + - benchmark + - parallel privileged: true pid: host environment: diff --git a/take-away/docs/user-guide/benchmarking-guide.md b/take-away/docs/user-guide/benchmarking-guide.md index 1507ed4..f0d679d 100644 --- a/take-away/docs/user-guide/benchmarking-guide.md +++ b/take-away/docs/user-guide/benchmarking-guide.md @@ -113,7 +113,7 @@ make up make test-api # 5. Check GPU availability -nvidia-smi # or clinfo for Intel +clinfo ``` ### Single Video Benchmark diff --git a/take-away/docs/user-guide/get-started.md b/take-away/docs/user-guide/get-started.md index aede8f5..7e6f557 100644 --- a/take-away/docs/user-guide/get-started.md +++ b/take-away/docs/user-guide/get-started.md @@ -24,7 +24,7 @@ This guide walks you through the installation, configuration, and first-run of t |-----------|---------|-------------| | CPU | Intel Xeon 8 cores | Intel Xeon 16+ cores | | RAM | 16GB | 32GB+ | -| GPU | Intel Arc A770 (8GB) | NVIDIA RTX 3080+ / Intel Arc | +| GPU | Intel Arc A770 (8GB) | Intel Arc | | Storage | 50GB SSD | 200GB NVMe | | Network | 1 Gbps | 10 Gbps | @@ -34,7 +34,6 @@ This guide walks you through the installation, configuration, and first-run of t |----------|---------|---------| | Docker | 24.0+ | Container runtime | | Docker Compose | V2+ | Service orchestration | -| NVIDIA Driver | 535+ | GPU support (if NVIDIA) | | Intel GPU Driver | Latest | GPU support (if Intel) | | Python | 3.10+ | Local development (optional) | @@ -49,8 +48,6 @@ docker --version docker compose version # Expected: Docker Compose version v2.x.x -# GPU availability (NVIDIA) -nvidia-smi # OR for Intel clinfo | head -20 ``` @@ -117,6 +114,26 @@ make up > **Note**: `make build` pulls pre-built images from Docker Hub by default. Use `REGISTRY=false` to build from source. +## RTSP Stream for Live Verification + +To start a standalone RTSP streamer that loops video files for real-time order verification, run: + +```bash +WORKERS=1 docker compose --profile parallel up -d --no-deps rtsp-streamer +``` + +> **Prerequisite:** Place a test video at `storage/videos/test.mp4` before starting the streamer. You can run `make download-sample-video` to get one. + +Once the streamer is running, the following RTSP URL becomes available: + +| Access From | URL | +|-------------|-----| +| Host machine / Gradio UI | `rtsp://localhost:8554/station_1` | +| Other containers (internal) | `rtsp://rtsp-streamer:8554/station_1` | + +For multiple stations, increase `WORKERS` (e.g., `WORKERS=3`) to create `station_1`, `station_2`, and `station_3` streams. + + --- ## Configuration @@ -346,9 +363,6 @@ docker logs oa_ovms_vlm # Verify model path exists ls -la models/vlm/ -# Check GPU availability -nvidia-smi # or clinfo for Intel -``` ### Connection Refused to OVMS @@ -397,9 +411,6 @@ make down && make up **Solution**: ```bash -# For NVIDIA -nvidia-smi -sudo systemctl restart docker # For Intel sudo usermod -aG render $USER diff --git a/take-away/docs/user-guide/how-to-build-from-source.md b/take-away/docs/user-guide/how-to-build-from-source.md index 95c608b..645e6ab 100644 --- a/take-away/docs/user-guide/how-to-build-from-source.md +++ b/take-away/docs/user-guide/how-to-build-from-source.md @@ -38,18 +38,6 @@ sudo apt install -y docker.io docker-compose-v2 sudo usermod -aG docker $USER ``` -### GPU Support (Optional) - -```bash -# NVIDIA Container Toolkit -distribution=$(. /etc/os-release;echo $ID$VERSION_ID) -curl -s -L https://nvidia.github.io/nvidia-docker/gpgkey | sudo apt-key add - -curl -s -L https://nvidia.github.io/nvidia-docker/$distribution/nvidia-docker.list | \ - sudo tee /etc/apt/sources.list.d/nvidia-docker.list -sudo apt update -sudo apt install -y nvidia-container-toolkit -sudo systemctl restart docker -``` ### Python Environment @@ -484,10 +472,6 @@ ENV PYTHONPATH="/app:${PYTHONPATH}" **Issue**: `CUDA not available` -```bash -# Solution: Use nvidia runtime -docker run --gpus all -it oa_service:dev - # Or in compose services: order-accuracy: diff --git a/take-away/docs/user-guide/system-requirements.md b/take-away/docs/user-guide/system-requirements.md index 351d1bc..a18a851 100644 --- a/take-away/docs/user-guide/system-requirements.md +++ b/take-away/docs/user-guide/system-requirements.md @@ -25,7 +25,7 @@ Suitable for development and testing with single worker mode. |-----------|---------------| | **CPU** | Intel Xeon 8 cores @ 2.4 GHz | | **RAM** | 16 GB DDR4 | -| **GPU** | Intel Arc A770 8GB / NVIDIA RTX 3060 12GB | +| **GPU** | Intel Arc A770 8GB | | **Storage** | 50 GB SSD | | **Network** | 1 Gbps Ethernet | @@ -37,7 +37,7 @@ Suitable for production with 2-4 station workers. |-----------|---------------| | **CPU** | Intel Xeon 16 cores @ 3.0 GHz | | **RAM** | 32 GB DDR4 | -| **GPU** | Intel Data Center GPU Max / NVIDIA RTX 3080 10GB | +| **GPU** | Intel Data Center GPU Max| | **Storage** | 200 GB NVMe SSD | | **Network** | 10 Gbps Ethernet | @@ -49,7 +49,7 @@ Suitable for production with 8+ station workers. |-----------|---------------| | **CPU** | Intel Xeon 32+ cores @ 3.0 GHz | | **RAM** | 64 GB DDR4/DDR5 | -| **GPU** | 2x NVIDIA RTX 4090 / Intel Data Center GPU Flex | +| **GPU** | Intel Data Center GPU Flex | | **Storage** | 500 GB NVMe SSD RAID | | **Network** | 25 Gbps Ethernet | @@ -72,18 +72,10 @@ Suitable for production with 8+ station workers. |----------|-----------------|-------------| | Docker Engine | 24.0.0 | 25.0+ | | Docker Compose | 2.20.0 | 2.24+ | -| NVIDIA Container Toolkit | 1.14.0 | Latest | | containerd | 1.6.0 | 1.7+ | ### GPU Drivers -#### NVIDIA -| Driver | Minimum Version | -|--------|-----------------| -| NVIDIA Driver | 535.x | -| CUDA Toolkit | 12.0 | -| cuDNN | 8.9 | - #### Intel | Driver | Minimum Version | |--------|-----------------| @@ -146,16 +138,6 @@ sudo ufw allow 9001/tcp # MinIO Console (admin only) ## GPU Support -### NVIDIA GPUs - -| GPU | VRAM | Workers Supported | Notes | -|-----|------|-------------------|-------| -| RTX 3060 | 12 GB | 1-2 | Development | -| RTX 3080 | 10 GB | 2-4 | Recommended | -| RTX 4080 | 16 GB | 4-6 | High performance | -| RTX 4090 | 24 GB | 6-8 | Best performance | -| A100 | 40/80 GB | 10+ | Data center | - ### Intel GPUs | GPU | VRAM | Workers Supported | Notes | diff --git a/take-away/src/core/ovms_client.py b/take-away/src/core/ovms_client.py index ab89dc4..040f377 100755 --- a/take-away/src/core/ovms_client.py +++ b/take-away/src/core/ovms_client.py @@ -19,8 +19,9 @@ logger = logging.getLogger(__name__) -# Directory for saving VLM input frames +# Directory for saving VLM input frames (debug only) VLM_INPUT_DIR = os.environ.get('VLM_INPUT_DIR', '/results/vlm-in') +SAVE_VLM_INPUT = os.environ.get('SAVE_VLM_INPUT', 'false').lower() in ('true', '1', 'yes') class OVMSVLMClient: @@ -181,7 +182,7 @@ def generate( } # Save input frames for debugging (before sending request) - if unique_id: + if unique_id and SAVE_VLM_INPUT: self._save_input_frames(images, unique_id) # Send request diff --git a/take-away/src/ocr_worker.py b/take-away/src/ocr_worker.py index c1ac70d..e16fb74 100644 --- a/take-away/src/ocr_worker.py +++ b/take-away/src/ocr_worker.py @@ -8,6 +8,24 @@ import numpy as np +def _load_known_orders(): + """Load valid order IDs from orders.json for OCR validation.""" + import json + import os + orders_path = os.environ.get("ORDERS_PATH", "/config/orders.json") + try: + with open(orders_path, "r") as f: + data = json.load(f) + known = set(str(k) for k in data.keys()) + import sys + print(f"[OCR-WORKER] Loaded {len(known)} known order IDs: {known}", file=sys.stderr, flush=True) + return known + except Exception as e: + import sys + print(f"[OCR-WORKER] WARNING: Could not load {orders_path}: {e}", file=sys.stderr, flush=True) + return set() + + def run_worker(input_queue, output_queue, models_dir): """ Entry point of the OCR worker subprocess. @@ -18,6 +36,9 @@ def run_worker(input_queue, output_queue, models_dir): """ import cv2 import easyocr + import sys + + known_orders = _load_known_orders() try: reader = easyocr.Reader( @@ -41,7 +62,6 @@ def run_worker(input_queue, output_queue, models_dir): order_id = None # DEBUG: Log all OCR results for troubleshooting - import sys if results: print(f"[OCR-DEBUG] frame={frame_id} raw_results={results}", file=sys.stderr, flush=True) @@ -49,7 +69,14 @@ def run_worker(input_queue, output_queue, models_dir): if '#' in text: digits = ''.join(c for c in text if c.isdigit()) if len(digits) >= 3: - order_id = digits[:3] + candidate = digits[:3] + # Validate against known orders to reject misreads + if known_orders and candidate not in known_orders: + print(f"[OCR-VALIDATE] frame={frame_id} rejected '{candidate}' " + f"(not in known orders {known_orders})", + file=sys.stderr, flush=True) + continue + order_id = candidate break output_queue.put((frame_id, order_id)) except Exception: From 09dfe216e061da215724223c51e6d5e61057d54b Mon Sep 17 00:00:00 2001 From: Jitendra Date: Tue, 17 Mar 2026 18:10:18 +0530 Subject: [PATCH 3/4] updated the document for dine in --- README.md | 2 +- dine-in/docs/user-guide/get-started.md | 12 ++++++------ dine-in/docs/user-guide/how-to-use-application.md | 2 +- 3 files changed, 8 insertions(+), 8 deletions(-) diff --git a/README.md b/README.md index 6425b0f..9e53e98 100755 --- a/README.md +++ b/README.md @@ -277,7 +277,7 @@ make down # Stop services make logs # View logs make update-submodules # Initialize performance-tools (required before benchmarking) make benchmark # Run benchmark -make benchmark-density # Run stream density test +make benchmark-stream-density # Run stream density test make benchmark-density-results # View density benchmark results ``` diff --git a/dine-in/docs/user-guide/get-started.md b/dine-in/docs/user-guide/get-started.md index 02b5f98..9f2cfd2 100644 --- a/dine-in/docs/user-guide/get-started.md +++ b/dine-in/docs/user-guide/get-started.md @@ -207,7 +207,7 @@ This starts 4 containers: To measure the maximum number of concurrent image validations the system can sustain under a latency target: ```bash -make benchmark-density +make benchmark-stream-density ``` This automatically scales concurrent requests up, measuring end-to-end latency at each level, and stops when the target latency (default 15s) is exceeded. Results are saved to `./results/`. @@ -215,7 +215,7 @@ This automatically scales concurrent requests up, measuring end-to-end latency a Override defaults via environment or CLI: ```bash -make benchmark-density \ +make benchmark-stream-density \ BENCHMARK_TARGET_LATENCY_MS=20000 \ BENCHMARK_INIT_DURATION=30 ``` @@ -378,7 +378,7 @@ make benchmark BENCHMARK_WORKERS=2 BENCHMARK_DURATION=600 ### Stream Density Test ```bash -make benchmark-density +make benchmark-stream-density ``` ### Stream Density Configuration @@ -407,10 +407,10 @@ export BENCHMARK_DENSITY_INCREMENT=2 export BENCHMARK_LATENCY_METRIC=p95 # Run benchmark (uses env vars) -make benchmark-density +make benchmark-stream-density # Short aliases also work on the CLI: -make benchmark-density TARGET_LATENCY_MS=20000 DENSITY_INCREMENT=2 LATENCY_METRIC=p95 +make benchmark-stream-density TARGET_LATENCY_MS=20000 DENSITY_INCREMENT=2 LATENCY_METRIC=p95 ``` **Using CLI Arguments (override env vars):** @@ -572,7 +572,7 @@ make clean # Run benchmarks make benchmark-single IMAGE_ID=MCD-1001 # Quick single image test make benchmark # Full benchmark -make benchmark-density # Stream density test +make benchmark-stream-density # Stream density test # Development make shell # Shell into container diff --git a/dine-in/docs/user-guide/how-to-use-application.md b/dine-in/docs/user-guide/how-to-use-application.md index 7809ad6..fbda5e8 100644 --- a/dine-in/docs/user-guide/how-to-use-application.md +++ b/dine-in/docs/user-guide/how-to-use-application.md @@ -199,7 +199,7 @@ make benchmark BENCHMARK_WORKERS=2 BENCHMARK_DURATION=600 BENCHMARK_TARGET_DEVIC Tests maximum concurrent validations within latency target. ```bash -make benchmark-density +make benchmark-stream-density ``` Output: From ab1b915528993935499257d928acfd9ea23e0058 Mon Sep 17 00:00:00 2001 From: Sachin Sharma Date: Tue, 17 Mar 2026 20:44:18 +0530 Subject: [PATCH 4/4] updated doc for device usage --- dine-in/.env.example | 2 +- dine-in/Makefile | 12 ++- dine-in/docker-compose.yml | 6 +- dine-in/docs/user-guide/get-started.md | 10 ++- .../docs/user-guide/how-to-use-application.md | 10 ++- ovms-service/setup_models.sh | 80 ++++++++++++++++--- take-away/.env.example | 3 +- take-away/Makefile | 15 ++-- take-away/docker-compose.yaml | 8 +- .../docs/user-guide/benchmarking-guide.md | 8 +- take-away/docs/user-guide/get-started.md | 10 ++- 11 files changed, 128 insertions(+), 36 deletions(-) diff --git a/dine-in/.env.example b/dine-in/.env.example index 4c4f6ee..4b0e108 100755 --- a/dine-in/.env.example +++ b/dine-in/.env.example @@ -19,7 +19,7 @@ LOG_LEVEL=INFO OVMS_ENDPOINT=http://ovms-vlm:8000 OVMS_MODEL_NAME=Qwen/Qwen2.5-VL-7B-Instruct VLM_PRECISION=int8 -VLM_DEVICE=GPU +TARGET_DEVICE=GPU SEMANTIC_SERVICE_ENDPOINT=http://semantic-service:8080 API_TIMEOUT=60 diff --git a/dine-in/Makefile b/dine-in/Makefile index 85a7c78..bbe8894 100755 --- a/dine-in/Makefile +++ b/dine-in/Makefile @@ -5,6 +5,12 @@ # Dine-In Order Accuracy - Makefile # ============================================================================= +# ============================================================================= +# Load Environment Variables from .env file +# ============================================================================= +-include .env +export + # Worker configuration for stream density testing WORKERS ?= 1 ITERATIONS ?= 0 @@ -80,7 +86,7 @@ BENCHMARK_WORKERS ?= 1 BENCHMARK_ITERATIONS ?= 1 BENCHMARK_DURATION ?= 180 BENCHMARK_TARGET_FPS ?= 15.0 -BENCHMARK_TARGET_DEVICE ?= GPU +TARGET_DEVICE ?= GPU # Colors for output RED := \033[0;31m @@ -393,7 +399,7 @@ benchmark: @echo "╚═══════════════════════════════════════════════════════════════════╝" @echo "Workers: $(BENCHMARK_WORKERS)" @echo "Iterations: $(BENCHMARK_ITERATIONS)" - @echo "Target Device: $(BENCHMARK_TARGET_DEVICE)" + @echo "Target Device: $(TARGET_DEVICE)" @echo "Results Dir: $(RESULTS_DIR)" @echo "" mkdir -p $(RESULTS_DIR) @@ -410,7 +416,7 @@ benchmark: --init_duration $(BENCHMARK_INIT_DURATION) \ --duration $(BENCHMARK_DURATION) \ --results_dir $(CURDIR)/$(RESULTS_DIR) \ - --target_device $(BENCHMARK_TARGET_DEVICE) \ + --target_device $(TARGET_DEVICE) \ --skip_perf_tools; \ deactivate \ ) diff --git a/dine-in/docker-compose.yml b/dine-in/docker-compose.yml index 1db7168..910757f 100644 --- a/dine-in/docker-compose.yml +++ b/dine-in/docker-compose.yml @@ -49,7 +49,7 @@ services: - OVMS_ENDPOINT=${OVMS_ENDPOINT} - OVMS_MODEL_NAME=${OVMS_MODEL_NAME} - VLM_PRECISION=${VLM_PRECISION} - - VLM_DEVICE=${VLM_DEVICE} + - VLM_DEVICE=${TARGET_DEVICE:-GPU} - CACHE_ENABLED=true - CACHE_BACKEND=memory - PROMETHEUS_ENABLED=true @@ -97,7 +97,7 @@ services: - OVMS_ENDPOINT=${OVMS_ENDPOINT} - OVMS_MODEL_NAME=${OVMS_MODEL_NAME} - VLM_PRECISION=${VLM_PRECISION} - - VLM_DEVICE=${VLM_DEVICE} + - VLM_DEVICE=${TARGET_DEVICE:-GPU} - METRICS_COLLECTOR_ENDPOINT=http://metrics-collector:8084 - CONTAINER_RESULTS_PATH=/app/results - USECASE_1=dine-in-order-accuracy @@ -147,7 +147,7 @@ services: - OVMS_ENDPOINT=${OVMS_ENDPOINT} - OVMS_MODEL_NAME=${OVMS_MODEL_NAME} - VLM_PRECISION=${VLM_PRECISION} - - VLM_DEVICE=${VLM_DEVICE} + - VLM_DEVICE=${TARGET_DEVICE:-GPU} - CONTAINER_RESULTS_PATH=/app/results - USECASE_1=dine-in-order-accuracy - NO_PROXY=localhost,127.0.0.1,ovms-vlm,semantic-service,host.docker.internal diff --git a/dine-in/docs/user-guide/get-started.md b/dine-in/docs/user-guide/get-started.md index 9f2cfd2..b5e263b 100644 --- a/dine-in/docs/user-guide/get-started.md +++ b/dine-in/docs/user-guide/get-started.md @@ -2,6 +2,12 @@ This guide walks you through the installation, configuration, and first-run of the Dine-In Order Accuracy system for image-based plate validation. +> **Note — `TARGET_DEVICE`**: To change the inference device mode, set `TARGET_DEVICE` in your `.env` file to `GPU`, `CPU`, or `AUTO`. After changing the device, re-run the setup script to update the model config: +> ```bash +> cd ../ovms-service && ./setup_models.sh --app dine-in +> ``` +> You can also pass the device explicitly: `./setup_models.sh --device CPU` + --- ## Table of Contents @@ -366,13 +372,13 @@ Configuration options: |----------|---------|-------------| | `BENCHMARK_WORKERS` | 1 | Number of concurrent workers | | `BENCHMARK_DURATION` | 180 | Benchmark duration (seconds) | -| `BENCHMARK_TARGET_DEVICE` | GPU | Target device: CPU, GPU, NPU | +| `TARGET_DEVICE` | GPU | Target device: CPU, GPU, NPU | | `RESULTS_DIR` | results | Output directory | Example with custom settings: ```bash -make benchmark BENCHMARK_WORKERS=2 BENCHMARK_DURATION=600 +make benchmark BENCHMARK_WORKERS=2 BENCHMARK_DURATION=600 TARGET_DEVICE=GPU ``` ### Stream Density Test diff --git a/dine-in/docs/user-guide/how-to-use-application.md b/dine-in/docs/user-guide/how-to-use-application.md index fbda5e8..99fe1f4 100644 --- a/dine-in/docs/user-guide/how-to-use-application.md +++ b/dine-in/docs/user-guide/how-to-use-application.md @@ -2,6 +2,12 @@ Guide to using the Dine-In Order Accuracy application features. +> **Note — `TARGET_DEVICE`**: To change the inference device mode, set `TARGET_DEVICE` in your `.env` file to `GPU`, `CPU`, or `AUTO`. After changing the device, re-run the setup script to update the model config: +> ```bash +> cd ../ovms-service && ./setup_models.sh --app dine-in +> ``` +> You can also pass the device explicitly: `./setup_models.sh --device CPU` + ## Gradio UI Access the web interface at http://localhost:7861 @@ -185,13 +191,13 @@ Configuration options: | `BENCHMARK_INIT_DURATION` | 60 | Warmup time (seconds) | | `BENCHMARK_MIN_REQUESTS` | 3 | Min requests before measuring | | `BENCHMARK_REQUEST_TIMEOUT` | 300 | Request timeout (seconds) | -| `BENCHMARK_TARGET_DEVICE` | GPU | Target device: CPU, GPU, NPU | +| `TARGET_DEVICE` | GPU | Target device: CPU, GPU, NPU | | `RESULTS_DIR` | results | Output directory | | `REGISTRY` | false | Use registry images (true/false) | Example: ```bash -make benchmark BENCHMARK_WORKERS=2 BENCHMARK_DURATION=600 BENCHMARK_TARGET_DEVICE=GPU +make benchmark BENCHMARK_WORKERS=2 BENCHMARK_DURATION=600 TARGET_DEVICE=GPU ``` ### Stream Density Test diff --git a/ovms-service/setup_models.sh b/ovms-service/setup_models.sh index 65c26f9..b2fcd97 100755 --- a/ovms-service/setup_models.sh +++ b/ovms-service/setup_models.sh @@ -6,6 +6,32 @@ SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" PROJECT_ROOT="$(dirname "${SCRIPT_DIR}")" MODELS_DIR="${SCRIPT_DIR}/models" +############################################### +# TARGET DEVICE CONFIGURATION +# Override via: TARGET_DEVICE=CPU ./setup_models.sh +# or: ./setup_models.sh --device CPU +############################################### +# Parse CLI flags: --device and --app +for arg in "$@"; do + case "$arg" in + --device=*) TARGET_DEVICE="${arg#*=}"; TARGET_DEVICE_FROM_CLI=true ;; + --device) _shift_device=true ;; + --app=*) SETUP_APP="${arg#*=}" ;; + --app) _shift_app=true ;; + *) + if [ "${_shift_device:-}" = true ]; then + TARGET_DEVICE="$arg"; TARGET_DEVICE_FROM_CLI=true; _shift_device=false + elif [ "${_shift_app:-}" = true ]; then + SETUP_APP="$arg"; _shift_app=false + fi + ;; + esac +done +# If set via environment variable (not CLI), mark it so .env doesn't override +if [ -n "${TARGET_DEVICE:-}" ] && [ -z "${TARGET_DEVICE_FROM_CLI:-}" ]; then + TARGET_DEVICE_FROM_CLI=true +fi + ############################################### # HARD CODED MODEL REGISTRY ############################################### @@ -19,15 +45,49 @@ POTENTIAL_SOURCE_DIRS=( ) ############################################### -# LOAD OVMS_MODEL_NAME FROM take-away/.env +# LOAD OVMS_MODEL_NAME AND TARGET_DEVICE FROM .env ############################################### -ENV_FILE="${PROJECT_ROOT}/take-away/.env" -if [ -f "${ENV_FILE}" ]; then - OVMS_MODEL_NAME_ENV=$(grep -E '^OVMS_MODEL_NAME=' "${ENV_FILE}" | head -1 | cut -d'=' -f2- | tr -d '"\r') +# Determine which .env file(s) to read based on --app flag +if [ "${SETUP_APP:-}" = "take-away" ]; then + _env_files=("${PROJECT_ROOT}/take-away/.env") +elif [ "${SETUP_APP:-}" = "dine-in" ]; then + _env_files=("${PROJECT_ROOT}/dine-in/.env") +else + # No --app specified: check both (take-away first, then dine-in) + _env_files=("${PROJECT_ROOT}/take-away/.env" "${PROJECT_ROOT}/dine-in/.env") fi + +TARGET_DEVICE_SOURCE="" +for _env_file in "${_env_files[@]}"; do + if [ -f "${_env_file}" ]; then + _model=$(grep -E '^OVMS_MODEL_NAME=' "${_env_file}" 2>/dev/null | head -1 | cut -d'=' -f2- | tr -d '"\r') + [ -n "${_model}" ] && OVMS_MODEL_NAME_ENV="${_model}" + # Read TARGET_DEVICE from .env if not already set via CLI/env + if [ -z "${TARGET_DEVICE_FROM_CLI:-}" ]; then + _device=$(grep -E '^TARGET_DEVICE=' "${_env_file}" 2>/dev/null | head -1 | cut -d'=' -f2- | tr -d '"\r') + if [ -n "${_device}" ]; then + TARGET_DEVICE="${_device}" + TARGET_DEVICE_SOURCE="${_env_file}" + fi + fi + fi +done + +TARGET_DEVICE="${TARGET_DEVICE:-GPU}" + # Fall back to the hard-coded source model if .env is missing or unset OVMS_MODEL_NAME_ENV="${OVMS_MODEL_NAME_ENV:-Qwen/Qwen2.5-VL-7B-Instruct}" +# Print source of TARGET_DEVICE so user knows exactly where it came from +if [ "${TARGET_DEVICE_FROM_CLI:-}" = true ]; then + echo "Target device: ${TARGET_DEVICE} (from CLI / environment variable)" +elif [ -n "${TARGET_DEVICE_SOURCE}" ]; then + echo "Target device: ${TARGET_DEVICE} (from ${TARGET_DEVICE_SOURCE})" +else + echo "Target device: ${TARGET_DEVICE} (default — no .env found)" +fi +echo "" + ############################################### echo "==========================================" echo "OVMS Model Setup for Order Accuracy" @@ -135,7 +195,7 @@ export_model() { --source_model "${SOURCE_MODEL}" \ --weight-format int8 \ --pipeline_type VLM_CB \ - --target_device GPU \ + --target_device "${TARGET_DEVICE}" \ --cache_size 32 \ --max_num_seqs 1 \ --enable_prefix_caching \ @@ -241,7 +301,9 @@ apply_graph_config() { local GRAPH_FILE="${MODELS_DIR}/Qwen/${MODEL_NAME}/graph.pbtxt" if [ ! -f "${GRAPH_OPTIONS_FILE}" ]; then - echo " No graph_options.json found, keeping existing graph.pbtxt" + echo " No graph_options.json found, updating device to ${TARGET_DEVICE} in graph.pbtxt" + sed -i "s/device: \"[A-Z]*\"/device: \"${TARGET_DEVICE}\"/g" "${GRAPH_FILE}" + echo " ✓ graph.pbtxt device set to ${TARGET_DEVICE}" return 0 fi @@ -250,8 +312,8 @@ apply_graph_config() { echo "Applying graph_options.json to graph.pbtxt" echo "------------------------------------------" - python3 - "${GRAPH_OPTIONS_FILE}" "${GRAPH_FILE}" << 'PYEOF' -import json, sys + TARGET_DEVICE="${TARGET_DEVICE}" python3 - "${GRAPH_OPTIONS_FILE}" "${GRAPH_FILE}" << 'PYEOF' +import json, sys, os graph_options_file = sys.argv[1] graph_file = sys.argv[2] @@ -269,7 +331,7 @@ dynamic_split = 'true' if opts.get('dynamic_split_fuse', False) else 'false' max_num_seqs = opts.get('max_num_seqs', 4) cache_size = opts.get('cache_size', 10) max_num_batched_tokens = opts.get('max_num_batched_tokens', 4096) -device = opts.get('device', 'GPU') +device = opts.get('device', os.environ.get('TARGET_DEVICE', 'GPU')) graph = f'''input_stream: "HTTP_REQUEST_PAYLOAD:input" output_stream: "HTTP_RESPONSE_PAYLOAD:output" diff --git a/take-away/.env.example b/take-away/.env.example index a0ea44a..f510cf1 100644 --- a/take-away/.env.example +++ b/take-away/.env.example @@ -30,11 +30,10 @@ VLM_BACKEND=ovms OVMS_ENDPOINT=http://ovms-vlm:8000 OVMS_MODEL_NAME=Qwen/Qwen2.5-VL-7B-Instruct VLM_PRECISION=int8 -VLM_DEVICE=GPU +TARGET_DEVICE=GPU OVMS_TIMEOUT=120 # OpenVINO local settings (when VLM_BACKEND=openvino) -OPENVINO_DEVICE=GPU VLM_MODEL_PATH=/model/Qwen2.5-VL-7B-Instruct # ----------------------------------------------------------------------------- diff --git a/take-away/Makefile b/take-away/Makefile index b1db282..3c79105 100644 --- a/take-away/Makefile +++ b/take-away/Makefile @@ -32,7 +32,8 @@ SCALING_MODE ?= fixed VLM_BACKEND ?= ovms OVMS_ENDPOINT ?= http://ovms-vlm:8000 OVMS_MODEL_NAME ?= Qwen/Qwen2.5-VL-7B-Instruct -OPENVINO_DEVICE ?= GPU +OPENVINO_DEVICE ?= $(TARGET_DEVICE) +TARGET_DEVICE ?= GPU # Semantic Service SEMANTIC_VLM_BACKEND ?= ovms @@ -131,7 +132,7 @@ download-sample-video: setup-dirs # Required variables for specific modes REQUIRED_OVMS_VARS := OVMS_ENDPOINT OVMS_MODEL_NAME -REQUIRED_OPENVINO_VARS := OPENVINO_DEVICE VLM_MODEL_PATH +REQUIRED_OPENVINO_VARS := TARGET_DEVICE VLM_MODEL_PATH check-env: @echo "$(BLUE)Checking environment configuration...$(NC)" @@ -230,7 +231,7 @@ show-config: echo " OVMS_MODEL_NAME = $(OVMS_MODEL_NAME)"; \ else \ echo " VLM_MODEL_PATH = $(VLM_MODEL_PATH)"; \ - echo " OPENVINO_DEVICE = $(OPENVINO_DEVICE)"; \ + echo " TARGET_DEVICE = $(TARGET_DEVICE)"; \ fi @echo "" @echo "$(YELLOW)Semantic Service:$(NC)" @@ -426,7 +427,7 @@ benchmark: setup-dirs ## Run Order Accuracy benchmark (uses benchmark_order_accu @echo "Workers: $(BENCHMARK_WORKERS)" @echo "Duration: $(BENCHMARK_DURATION)s" @echo "Init Duration: $(BENCHMARK_INIT_DURATION)s" - @echo "Target Device: $(OPENVINO_DEVICE)" + @echo "Target Device: $(TARGET_DEVICE)" @echo "" cd $(PERF_TOOLS_DIR) && \ ( \ @@ -440,7 +441,7 @@ benchmark: setup-dirs ## Run Order Accuracy benchmark (uses benchmark_order_accu --init_duration $(BENCHMARK_INIT_DURATION) \ --profile benchmark \ --results_dir $(CURDIR)/$(RESULTS_DIR) \ - --target_device $(OPENVINO_DEVICE) \ + --target_device $(TARGET_DEVICE) \ --skip_perf_tools \ --skip_export; \ deactivate \ @@ -480,7 +481,7 @@ benchmark-oa: setup-dirs ## Run Order Accuracy benchmark with fixed workers @echo "Workers: $(BENCHMARK_WORKERS)" @echo "Duration: $(BENCHMARK_DURATION)s" @echo "Init Duration: $(BENCHMARK_INIT_DURATION)s" - @echo "Target Device: $(OPENVINO_DEVICE)" + @echo "Target Device: $(TARGET_DEVICE)" @echo "" cd $(PERF_TOOLS_DIR) && \ ( \ @@ -494,7 +495,7 @@ benchmark-oa: setup-dirs ## Run Order Accuracy benchmark with fixed workers --init_duration $(BENCHMARK_INIT_DURATION) \ --profile benchmark \ --results_dir $(CURDIR)/$(RESULTS_DIR) \ - --target_device $(OPENVINO_DEVICE) \ + --target_device $(TARGET_DEVICE) \ --skip_perf_tools \ --skip_export; \ deactivate \ diff --git a/take-away/docker-compose.yaml b/take-away/docker-compose.yaml index 3f20e4d..c9a7968 100755 --- a/take-away/docker-compose.yaml +++ b/take-away/docker-compose.yaml @@ -103,13 +103,13 @@ services: # VLM Backend (embedded or ovms) VLM_BACKEND: ${VLM_BACKEND:-ovms} VLM_MODEL_PATH: ${VLM_MODEL_PATH:-/model/Qwen2.5-VL-7B-Instruct} - OPENVINO_DEVICE: ${OPENVINO_DEVICE:-GPU} + OPENVINO_DEVICE: ${TARGET_DEVICE:-GPU} # OVMS settings (when VLM_BACKEND=ovms) OVMS_ENDPOINT: ${OVMS_ENDPOINT:-http://ovms-vlm:8000} OVMS_MODEL_NAME: ${OVMS_MODEL_NAME:-Qwen/Qwen2.5-VL-7B-Instruct} VLM_PRECISION: ${VLM_PRECISION:-int8} - VLM_DEVICE: ${VLM_DEVICE:-GPU} + VLM_DEVICE: ${TARGET_DEVICE:-GPU} OVMS_TIMEOUT: ${OVMS_TIMEOUT:-120} # Semantic service @@ -281,11 +281,11 @@ services: - OVMS_ENDPOINT=http://ovms-vlm:8000 - OVMS_MODEL_NAME=${OVMS_MODEL_NAME:-Qwen/Qwen2.5-VL-7B-Instruct} - VLM_PRECISION=${VLM_PRECISION:-int8} - - VLM_DEVICE=${VLM_DEVICE:-GPU} + - VLM_DEVICE=${TARGET_DEVICE:-GPU} - OVMS_TIMEOUT=${OVMS_TIMEOUT:-60} # OpenVINO settings (when VLM_BACKEND=openvino) - OPENVINO_MODEL_PATH=${OPENVINO_MODEL_PATH:-/models/Qwen2.5-VL-7B-Instruct} - - OPENVINO_DEVICE=${OPENVINO_DEVICE:-GPU} + - OPENVINO_DEVICE=${TARGET_DEVICE:-GPU} # Cache settings - CACHE_ENABLED=true - CACHE_BACKEND=memory diff --git a/take-away/docs/user-guide/benchmarking-guide.md b/take-away/docs/user-guide/benchmarking-guide.md index f0d679d..6c05dc2 100644 --- a/take-away/docs/user-guide/benchmarking-guide.md +++ b/take-away/docs/user-guide/benchmarking-guide.md @@ -2,6 +2,12 @@ This guide covers performance testing, stream density benchmarking, and metrics collection for the Take-Away Order Accuracy system. +> **Note — `TARGET_DEVICE`**: To change the inference device mode, set `TARGET_DEVICE` in your `.env` file to `GPU`, `CPU`, or `AUTO`. After changing the device, re-run the setup script to update the model config: +> ```bash +> cd ../ovms-service && ./setup_models.sh --app take-away +> ``` +> You can also pass the device explicitly: `./setup_models.sh --device CPU` + --- ## Table of Contents @@ -562,7 +568,7 @@ VLM_BATCH_SIZE=4 VLM_BATCH_TIMEOUT_MS=100 # For memory-constrained systems -OPENVINO_DEVICE=CPU +TARGET_DEVICE=CPU VLM_MAX_CONCURRENT=2 ``` diff --git a/take-away/docs/user-guide/get-started.md b/take-away/docs/user-guide/get-started.md index 7e6f557..6b603ea 100644 --- a/take-away/docs/user-guide/get-started.md +++ b/take-away/docs/user-guide/get-started.md @@ -2,6 +2,12 @@ This guide walks you through the installation, configuration, and first-run of the Take-Away Order Accuracy system. +> **Note — `TARGET_DEVICE`**: To change the inference device mode, set `TARGET_DEVICE` in your `.env` file to `GPU`, `CPU`, or `AUTO`. After changing the device, re-run the setup script to update the model config: +> ```bash +> cd ../ovms-service && ./setup_models.sh --app take-away +> ``` +> You can also pass the device explicitly: `./setup_models.sh --device CPU` + --- ## Table of Contents @@ -147,7 +153,7 @@ For multiple stations, increase `WORKERS` (e.g., `WORKERS=3`) to create `station VLM_BACKEND=ovms OVMS_ENDPOINT=http://ovms-vlm:8000 OVMS_MODEL_NAME=Qwen/Qwen2.5-VL-7B-Instruct -OPENVINO_DEVICE=GPU # 'GPU', 'CPU', or 'AUTO' +TARGET_DEVICE=GPU # 'GPU', 'CPU', or 'AUTO' # ============================================================================= # Semantic Service @@ -399,7 +405,7 @@ make up export VLM_BATCH_SIZE=1 # Use CPU instead of GPU (slower but less memory) -export OPENVINO_DEVICE=CPU +export TARGET_DEVICE=CPU # Restart services make down && make up