From 392ce3bd2a96b3a7153984369c9fa3100b1ef8bb Mon Sep 17 00:00:00 2001
From: Sachin Sharma <sachin.kumar.sharma@intel.com>
Date: Tue, 17 Mar 2026 14:23:04 +0530
Subject: [PATCH 1/4] updated metrics folder for din in

---
 dine-in/docker-compose.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/dine-in/docker-compose.yml b/dine-in/docker-compose.yml
index e85d291..1db7168 100644
--- a/dine-in/docker-compose.yml
+++ b/dine-in/docker-compose.yml
@@ -180,7 +180,7 @@ services:
         - METRICS_DIR=/tmp/results
         - DEVICE_ENV_PATH=/configs/device.env
       volumes:
-        - ./metrics:/tmp/results
+        - ./results:/tmp/results
         - ./configs:/configs
         - /tmp/.X11-unix:/tmp/.X11-unix
         - /sys/devices:/sys/devices

From 25c79e05cc6e4e8159ca1190d313b052cd06eb2f Mon Sep 17 00:00:00 2001
From: Jitendra <jitendra.kumar.saini@intel.com>
Date: Tue, 17 Mar 2026 17:59:26 +0530
Subject: [PATCH 2/4] updated document and resolved issues

---
 dine-in/Makefile                              |  9 ++----
 dine-in/README.md                             |  2 +-
 dine-in/docs/user-guide/get-started.md        | 15 ++-------
 take-away/.env.example                        |  4 +--
 take-away/Dockerfile                          |  3 +-
 take-away/Makefile                            | 11 ++++---
 take-away/config/orders.json                  |  1 -
 take-away/docker-compose.yaml                 |  6 ++--
 .../docs/user-guide/benchmarking-guide.md     |  2 +-
 take-away/docs/user-guide/get-started.md      | 31 +++++++++++++------
 .../user-guide/how-to-build-from-source.md    | 16 ----------
 .../docs/user-guide/system-requirements.md    | 24 ++------------
 take-away/src/core/ovms_client.py             |  5 +--
 take-away/src/ocr_worker.py                   | 31 +++++++++++++++++--
 14 files changed, 79 insertions(+), 81 deletions(-)

diff --git a/dine-in/Makefile b/dine-in/Makefile
index e95e541..85a7c78 100755
--- a/dine-in/Makefile
+++ b/dine-in/Makefile
@@ -17,7 +17,7 @@ CONCURRENCY_MAX ?= 10
 REQUESTS_PER_LEVEL ?= 10
 REQUEST_TIMEOUT ?= 30
 RESULTS_DIR ?= results
-METRICS_DIR ?= metrics
+METRICS_DIR ?= results
 OOM_PROTECTION ?= 1
 
 # Performance tools path
@@ -431,10 +431,7 @@ benchmark-metrics:
 		echo "No benchmark metrics found. Run 'make benchmark' first."; \
 	fi
 
-benchmark-stream-density: benchmark-density
-	@echo "Note: benchmark-stream-density is deprecated, use 'make benchmark-density' instead"
-
-benchmark-density: ## Run Dine-In stream density benchmark (image-based latency)
+benchmark-stream-density: ## Run Dine-In stream density benchmark (image-based latency)
 	@if [ "$(OOM_PROTECTION)" = "0" ]; then \
 		echo "╔════════════════════════════════════════════════════════════╗"; \
 		echo "║ WARNING                                                    ║"; \
@@ -558,7 +555,7 @@ plot-metrics: ## Generate plots from benchmark metrics
 	@echo "╔═══════════════════════════════════════════════════════════════════╗"
 	@echo "║                   Generating Metrics Plots                        ║"
 	@echo "╚═══════════════════════════════════════════════════════════════════╝"
-	@sudo chmod -R ugo+rw $(CURDIR)/$(METRICS_DIR) 2>/dev/null || true
+	@chmod -R ugo+rw $(CURDIR)/$(METRICS_DIR) 2>/dev/null || true
 	cd $(PERF_TOOLS_DIR) && \
 	( \
 		python3 -m venv venv && \
diff --git a/dine-in/README.md b/dine-in/README.md
index d4219bd..8f033fb 100644
--- a/dine-in/README.md
+++ b/dine-in/README.md
@@ -14,7 +14,7 @@
 ### Prerequisites
 
 - Docker 24.0+ with Compose V2
-- NVIDIA GPU with 8GB+ VRAM (or Intel GPU)
+- Intel GPU
 - 32GB+ RAM recommended
 - Intel Xeon or equivalent CPU
 
diff --git a/dine-in/docs/user-guide/get-started.md b/dine-in/docs/user-guide/get-started.md
index ccd1870..02b5f98 100644
--- a/dine-in/docs/user-guide/get-started.md
+++ b/dine-in/docs/user-guide/get-started.md
@@ -24,7 +24,7 @@ This guide walks you through the installation, configuration, and first-run of t
 |-----------|---------|-------------|
 | CPU | Intel Xeon 8 cores | Intel Xeon 16+ cores |
 | RAM | 16GB | 32GB+ |
-| GPU | Intel Arc A770 (8GB) | Intel Arc / NVIDIA RTX 3080+ |
+| GPU | Intel Arc A770 (8GB) | Intel Arc |
 | Storage | 50GB SSD | 200GB NVMe |
 | Network | 1 Gbps | 10 Gbps |
 
@@ -34,7 +34,6 @@ This guide walks you through the installation, configuration, and first-run of t
 |----------|---------|---------|
 | Docker | 24.0+ | Container runtime |
 | Docker Compose | V2+ | Service orchestration |
-| NVIDIA Driver | 535+ | GPU support (if NVIDIA) |
 | Intel GPU Driver | Latest | GPU support (if Intel) |
 | Python | 3.10+ | Local development (optional) |
 
@@ -49,10 +48,6 @@ docker --version
 docker compose version
 # Expected: Docker Compose version v2.x.x
 
-# GPU availability (NVIDIA)
-nvidia-smi
-# OR for Intel
-clinfo | head -20
 ```
 
 ---
@@ -447,8 +442,7 @@ docker logs dinein_ovms_vlm
 ls -la ../ovms-service/models/
 
 # Check GPU availability
-clinfo | head -20  # Intel
-nvidia-smi         # NVIDIA
+clinfo | head -20  # Inte
 ```
 
 ### Connection Refused to OVMS
@@ -486,7 +480,7 @@ netstat -tulpn | grep -E "7861|8083|8002|8081"
 
 **Solution**:
 - Ensure GPU drivers are installed
-- Check GPU utilization: `intel_gpu_top` or `nvidia-smi`
+- Check GPU utilization: `intel_gpu_top`
 - Verify OVMS is using GPU in logs: `docker logs dinein_ovms_vlm | grep -i gpu`
 - Consider reducing image resolution in preprocessing
 
@@ -522,9 +516,6 @@ sudo usermod -aG video $USER
 # Verify GPU access
 ls -la /dev/dri/
 
-# For NVIDIA
-nvidia-smi
-sudo systemctl restart docker
 ```
 
 ### No Scenarios Available in UI
diff --git a/take-away/.env.example b/take-away/.env.example
index d09e4c4..a0ea44a 100644
--- a/take-away/.env.example
+++ b/take-away/.env.example
@@ -114,8 +114,8 @@ NO_PROXY=localhost,127.0.0.1,order-accuracy,minio,ovms-vlm,semantic-service,rtsp
 # Example (Pexels direct download):
 #   SAMPLE_VIDEO_URL=https://www.pexels.com/download/video/12345678
 # Example (any direct .mp4 link):
-#   SAMPLE_VIDEO_URL=https://example.com/path/to/video.mp4
-SAMPLE_VIDEO_URL=
+#   SAMPLE_VIDEO_URL=https://www.pexels.com/download/video/35120443/
+SAMPLE_VIDEO_URL=https://www.pexels.com/download/video/35120438/
 
 # SAMPLE_VIDEO_DEST: Local destination path for the downloaded video.
 # Default is storage/videos/test.mp4 (used by RTSP streamer and benchmark).
diff --git a/take-away/Dockerfile b/take-away/Dockerfile
index ca9845c..7e83600 100755
--- a/take-away/Dockerfile
+++ b/take-away/Dockerfile
@@ -31,7 +31,8 @@ ENV TZ=UTC
 # -------------------------------
 # Install system dependencies
 # -------------------------------
-RUN apt-get update && \
+RUN rm -f /etc/apt/sources.list.d/intel-gpu*.list 2>/dev/null; \
+    apt-get update && \
     apt-get install -y --no-install-recommends \
         tzdata \
         git \
diff --git a/take-away/Makefile b/take-away/Makefile
index 59ae042..b1db282 100644
--- a/take-away/Makefile
+++ b/take-away/Makefile
@@ -59,7 +59,6 @@ TAG ?= 2026.0-rc1
 # Sample video for quick-start testing (orders 384 → 651 → 925)
 # Hosted on the upstream repo's GitHub Releases. Override via env if needed:
 #   SAMPLE_VIDEO_URL=<url> make download-sample-video
-SAMPLE_VIDEO_URL ?= https://github.com/intel-retail/order-accuracy/releases/download/2026.0-rc1/test.mp4
 SAMPLE_VIDEO_DEST ?= storage/videos/test.mp4
 
 # Registry configuration: true=pull from registry, false=build locally
@@ -76,7 +75,7 @@ BENCHMARK_DURATION ?= 200
 # OCR warmup - reduced from 5 to 2 for faster startup (~10s vs ~25s)
 OCR_WARMUP_FRAMES ?= 2
 BENCHMARK_WORKER_INCREMENT ?= 1
-BENCHMARK_MIN_TRANSACTIONS ?= 3
+BENCHMARK_MIN_TRANSACTIONS ?= 1
 BENCHMARK_CONTAINER ?= oa_service
 
 # Docker compose file
@@ -439,9 +438,11 @@ benchmark: setup-dirs ## Run Order Accuracy benchmark (uses benchmark_order_accu
 			--workers $(BENCHMARK_WORKERS) \
 			--duration $(BENCHMARK_DURATION) \
 			--init_duration $(BENCHMARK_INIT_DURATION) \
+			--profile benchmark \
 			--results_dir $(CURDIR)/$(RESULTS_DIR) \
 			--target_device $(OPENVINO_DEVICE) \
-			--skip_perf_tools; \
+			--skip_perf_tools \
+			--skip_export; \
 		deactivate \
 	)
 
@@ -491,9 +492,11 @@ benchmark-oa: setup-dirs ## Run Order Accuracy benchmark with fixed workers
 			--workers $(BENCHMARK_WORKERS) \
 			--duration $(BENCHMARK_DURATION) \
 			--init_duration $(BENCHMARK_INIT_DURATION) \
+			--profile benchmark \
 			--results_dir $(CURDIR)/$(RESULTS_DIR) \
 			--target_device $(OPENVINO_DEVICE) \
-			--skip_perf_tools; \
+			--skip_perf_tools \
+			--skip_export; \
 		deactivate \
 	)
 
diff --git a/take-away/config/orders.json b/take-away/config/orders.json
index 3a4f52f..e890863 100755
--- a/take-away/config/orders.json
+++ b/take-away/config/orders.json
@@ -20,7 +20,6 @@
     { "name": "yellow banana", "quantity": 1 }
   ],
   "651": [
-    { "name": "water bottle", "quantity": 1 },
     { "name": "banana", "quantity": 2 },
     { "name": "coke 2 liter bottle", "quantity": 1 }
   ],
diff --git a/take-away/docker-compose.yaml b/take-away/docker-compose.yaml
index c79392d..3f20e4d 100755
--- a/take-away/docker-compose.yaml
+++ b/take-away/docker-compose.yaml
@@ -321,7 +321,8 @@ services:
         - no_proxy=${NO_PROXY}
     container_name: oa_rtsp_streamer
     profiles:
-      - parallel  # Only start this service in parallel mode
+      - parallel
+      - benchmark
     depends_on:
       order-accuracy:
         condition: service_healthy
@@ -365,7 +366,8 @@ services:
     image: intel/retail-benchmark:2026.0-rc1
     container_name: metrics-collector
     profiles:
-      - benchmark  # Only start during benchmarking, not with regular 'make up'
+      - benchmark
+      - parallel
     privileged: true
     pid: host
     environment:
diff --git a/take-away/docs/user-guide/benchmarking-guide.md b/take-away/docs/user-guide/benchmarking-guide.md
index 1507ed4..f0d679d 100644
--- a/take-away/docs/user-guide/benchmarking-guide.md
+++ b/take-away/docs/user-guide/benchmarking-guide.md
@@ -113,7 +113,7 @@ make up
 make test-api
 
 # 5. Check GPU availability
-nvidia-smi  # or clinfo for Intel
+clinfo 
 ```
 
 ### Single Video Benchmark
diff --git a/take-away/docs/user-guide/get-started.md b/take-away/docs/user-guide/get-started.md
index aede8f5..7e6f557 100644
--- a/take-away/docs/user-guide/get-started.md
+++ b/take-away/docs/user-guide/get-started.md
@@ -24,7 +24,7 @@ This guide walks you through the installation, configuration, and first-run of t
 |-----------|---------|-------------|
 | CPU | Intel Xeon 8 cores | Intel Xeon 16+ cores |
 | RAM | 16GB | 32GB+ |
-| GPU | Intel Arc A770 (8GB) | NVIDIA RTX 3080+ / Intel Arc |
+| GPU | Intel Arc A770 (8GB) | Intel Arc |
 | Storage | 50GB SSD | 200GB NVMe |
 | Network | 1 Gbps | 10 Gbps |
 
@@ -34,7 +34,6 @@ This guide walks you through the installation, configuration, and first-run of t
 |----------|---------|---------|
 | Docker | 24.0+ | Container runtime |
 | Docker Compose | V2+ | Service orchestration |
-| NVIDIA Driver | 535+ | GPU support (if NVIDIA) |
 | Intel GPU Driver | Latest | GPU support (if Intel) |
 | Python | 3.10+ | Local development (optional) |
 
@@ -49,8 +48,6 @@ docker --version
 docker compose version
 # Expected: Docker Compose version v2.x.x
 
-# GPU availability (NVIDIA)
-nvidia-smi
 # OR for Intel
 clinfo | head -20
 ```
@@ -117,6 +114,26 @@ make up
 
 > **Note**: `make build` pulls pre-built images from Docker Hub by default. Use `REGISTRY=false` to build from source.
 
+## RTSP Stream for Live Verification
+
+To start a standalone RTSP streamer that loops video files for real-time order verification, run:
+
+```bash
+WORKERS=1 docker compose --profile parallel up -d --no-deps rtsp-streamer
+```
+
+> **Prerequisite:** Place a test video at `storage/videos/test.mp4` before starting the streamer. You can run `make download-sample-video` to get one.
+
+Once the streamer is running, the following RTSP URL becomes available:
+
+| Access From | URL |
+|-------------|-----|
+| Host machine / Gradio UI | `rtsp://localhost:8554/station_1` |
+| Other containers (internal) | `rtsp://rtsp-streamer:8554/station_1` |
+
+For multiple stations, increase `WORKERS` (e.g., `WORKERS=3`) to create `station_1`, `station_2`, and `station_3` streams.
+
+
 ---
 
 ## Configuration
@@ -346,9 +363,6 @@ docker logs oa_ovms_vlm
 # Verify model path exists
 ls -la models/vlm/
 
-# Check GPU availability
-nvidia-smi  # or clinfo for Intel
-```
 
 ### Connection Refused to OVMS
 
@@ -397,9 +411,6 @@ make down && make up
 
 **Solution**:
 ```bash
-# For NVIDIA
-nvidia-smi
-sudo systemctl restart docker
 
 # For Intel
 sudo usermod -aG render $USER
diff --git a/take-away/docs/user-guide/how-to-build-from-source.md b/take-away/docs/user-guide/how-to-build-from-source.md
index 95c608b..645e6ab 100644
--- a/take-away/docs/user-guide/how-to-build-from-source.md
+++ b/take-away/docs/user-guide/how-to-build-from-source.md
@@ -38,18 +38,6 @@ sudo apt install -y docker.io docker-compose-v2
 sudo usermod -aG docker $USER
 ```
 
-### GPU Support (Optional)
-
-```bash
-# NVIDIA Container Toolkit
-distribution=$(. /etc/os-release;echo $ID$VERSION_ID)
-curl -s -L https://nvidia.github.io/nvidia-docker/gpgkey | sudo apt-key add -
-curl -s -L https://nvidia.github.io/nvidia-docker/$distribution/nvidia-docker.list | \
-    sudo tee /etc/apt/sources.list.d/nvidia-docker.list
-sudo apt update
-sudo apt install -y nvidia-container-toolkit
-sudo systemctl restart docker
-```
 
 ### Python Environment
 
@@ -484,10 +472,6 @@ ENV PYTHONPATH="/app:${PYTHONPATH}"
 
 **Issue**: `CUDA not available`
 
-```bash
-# Solution: Use nvidia runtime
-docker run --gpus all -it oa_service:dev
-
 # Or in compose
 services:
   order-accuracy:
diff --git a/take-away/docs/user-guide/system-requirements.md b/take-away/docs/user-guide/system-requirements.md
index 351d1bc..a18a851 100644
--- a/take-away/docs/user-guide/system-requirements.md
+++ b/take-away/docs/user-guide/system-requirements.md
@@ -25,7 +25,7 @@ Suitable for development and testing with single worker mode.
 |-----------|---------------|
 | **CPU** | Intel Xeon 8 cores @ 2.4 GHz |
 | **RAM** | 16 GB DDR4 |
-| **GPU** | Intel Arc A770 8GB / NVIDIA RTX 3060 12GB |
+| **GPU** | Intel Arc A770 8GB  |
 | **Storage** | 50 GB SSD |
 | **Network** | 1 Gbps Ethernet |
 
@@ -37,7 +37,7 @@ Suitable for production with 2-4 station workers.
 |-----------|---------------|
 | **CPU** | Intel Xeon 16 cores @ 3.0 GHz |
 | **RAM** | 32 GB DDR4 |
-| **GPU** | Intel Data Center GPU Max / NVIDIA RTX 3080 10GB |
+| **GPU** | Intel Data Center GPU Max|
 | **Storage** | 200 GB NVMe SSD |
 | **Network** | 10 Gbps Ethernet |
 
@@ -49,7 +49,7 @@ Suitable for production with 8+ station workers.
 |-----------|---------------|
 | **CPU** | Intel Xeon 32+ cores @ 3.0 GHz |
 | **RAM** | 64 GB DDR4/DDR5 |
-| **GPU** | 2x NVIDIA RTX 4090 / Intel Data Center GPU Flex |
+| **GPU** | Intel Data Center GPU Flex |
 | **Storage** | 500 GB NVMe SSD RAID |
 | **Network** | 25 Gbps Ethernet |
 
@@ -72,18 +72,10 @@ Suitable for production with 8+ station workers.
 |----------|-----------------|-------------|
 | Docker Engine | 24.0.0 | 25.0+ |
 | Docker Compose | 2.20.0 | 2.24+ |
-| NVIDIA Container Toolkit | 1.14.0 | Latest |
 | containerd | 1.6.0 | 1.7+ |
 
 ### GPU Drivers
 
-#### NVIDIA
-| Driver | Minimum Version |
-|--------|-----------------|
-| NVIDIA Driver | 535.x |
-| CUDA Toolkit | 12.0 |
-| cuDNN | 8.9 |
-
 #### Intel
 | Driver | Minimum Version |
 |--------|-----------------|
@@ -146,16 +138,6 @@ sudo ufw allow 9001/tcp   # MinIO Console (admin only)
 
 ## GPU Support
 
-### NVIDIA GPUs
-
-| GPU | VRAM | Workers Supported | Notes |
-|-----|------|-------------------|-------|
-| RTX 3060 | 12 GB | 1-2 | Development |
-| RTX 3080 | 10 GB | 2-4 | Recommended |
-| RTX 4080 | 16 GB | 4-6 | High performance |
-| RTX 4090 | 24 GB | 6-8 | Best performance |
-| A100 | 40/80 GB | 10+ | Data center |
-
 ### Intel GPUs
 
 | GPU | VRAM | Workers Supported | Notes |
diff --git a/take-away/src/core/ovms_client.py b/take-away/src/core/ovms_client.py
index ab89dc4..040f377 100755
--- a/take-away/src/core/ovms_client.py
+++ b/take-away/src/core/ovms_client.py
@@ -19,8 +19,9 @@
 
 logger = logging.getLogger(__name__)
 
-# Directory for saving VLM input frames
+# Directory for saving VLM input frames (debug only)
 VLM_INPUT_DIR = os.environ.get('VLM_INPUT_DIR', '/results/vlm-in')
+SAVE_VLM_INPUT = os.environ.get('SAVE_VLM_INPUT', 'false').lower() in ('true', '1', 'yes')
 
 
 class OVMSVLMClient:
@@ -181,7 +182,7 @@ def generate(
         }
 
         # Save input frames for debugging (before sending request)
-        if unique_id:
+        if unique_id and SAVE_VLM_INPUT:
             self._save_input_frames(images, unique_id)
 
         # Send request
diff --git a/take-away/src/ocr_worker.py b/take-away/src/ocr_worker.py
index c1ac70d..e16fb74 100644
--- a/take-away/src/ocr_worker.py
+++ b/take-away/src/ocr_worker.py
@@ -8,6 +8,24 @@
 import numpy as np
 
 
+def _load_known_orders():
+    """Load valid order IDs from orders.json for OCR validation."""
+    import json
+    import os
+    orders_path = os.environ.get("ORDERS_PATH", "/config/orders.json")
+    try:
+        with open(orders_path, "r") as f:
+            data = json.load(f)
+        known = set(str(k) for k in data.keys())
+        import sys
+        print(f"[OCR-WORKER] Loaded {len(known)} known order IDs: {known}", file=sys.stderr, flush=True)
+        return known
+    except Exception as e:
+        import sys
+        print(f"[OCR-WORKER] WARNING: Could not load {orders_path}: {e}", file=sys.stderr, flush=True)
+        return set()
+
+
 def run_worker(input_queue, output_queue, models_dir):
     """
     Entry point of the OCR worker subprocess.
@@ -18,6 +36,9 @@ def run_worker(input_queue, output_queue, models_dir):
     """
     import cv2
     import easyocr
+    import sys
+
+    known_orders = _load_known_orders()
 
     try:
         reader = easyocr.Reader(
@@ -41,7 +62,6 @@ def run_worker(input_queue, output_queue, models_dir):
             order_id = None
             
             # DEBUG: Log all OCR results for troubleshooting
-            import sys
             if results:
                 print(f"[OCR-DEBUG] frame={frame_id} raw_results={results}", file=sys.stderr, flush=True)
             
@@ -49,7 +69,14 @@ def run_worker(input_queue, output_queue, models_dir):
                 if '#' in text:
                     digits = ''.join(c for c in text if c.isdigit())
                     if len(digits) >= 3:
-                        order_id = digits[:3]
+                        candidate = digits[:3]
+                        # Validate against known orders to reject misreads
+                        if known_orders and candidate not in known_orders:
+                            print(f"[OCR-VALIDATE] frame={frame_id} rejected '{candidate}' "
+                                  f"(not in known orders {known_orders})",
+                                  file=sys.stderr, flush=True)
+                            continue
+                        order_id = candidate
                         break
             output_queue.put((frame_id, order_id))
         except Exception:

From 09dfe216e061da215724223c51e6d5e61057d54b Mon Sep 17 00:00:00 2001
From: Jitendra <jitendra.kumar.saini@intel.com>
Date: Tue, 17 Mar 2026 18:10:18 +0530
Subject: [PATCH 3/4] updated the document for dine in

---
 README.md                                         |  2 +-
 dine-in/docs/user-guide/get-started.md            | 12 ++++++------
 dine-in/docs/user-guide/how-to-use-application.md |  2 +-
 3 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/README.md b/README.md
index 6425b0f..9e53e98 100755
--- a/README.md
+++ b/README.md
@@ -277,7 +277,7 @@ make down                   # Stop services
 make logs                   # View logs
 make update-submodules      # Initialize performance-tools (required before benchmarking)
 make benchmark              # Run benchmark
-make benchmark-density      # Run stream density test
+make benchmark-stream-density      # Run stream density test
 make benchmark-density-results  # View density benchmark results
 ```
 
diff --git a/dine-in/docs/user-guide/get-started.md b/dine-in/docs/user-guide/get-started.md
index 02b5f98..9f2cfd2 100644
--- a/dine-in/docs/user-guide/get-started.md
+++ b/dine-in/docs/user-guide/get-started.md
@@ -207,7 +207,7 @@ This starts 4 containers:
 To measure the maximum number of concurrent image validations the system can sustain under a latency target:
 
 ```bash
-make benchmark-density
+make benchmark-stream-density
 ```
 
 This automatically scales concurrent requests up, measuring end-to-end latency at each level, and stops when the target latency (default 15s) is exceeded. Results are saved to `./results/`.
@@ -215,7 +215,7 @@ This automatically scales concurrent requests up, measuring end-to-end latency a
 Override defaults via environment or CLI:
 
 ```bash
-make benchmark-density \
+make benchmark-stream-density \
   BENCHMARK_TARGET_LATENCY_MS=20000 \
   BENCHMARK_INIT_DURATION=30
 ```
@@ -378,7 +378,7 @@ make benchmark BENCHMARK_WORKERS=2 BENCHMARK_DURATION=600
 ### Stream Density Test
 
 ```bash
-make benchmark-density
+make benchmark-stream-density
 ```
 
 ### Stream Density Configuration
@@ -407,10 +407,10 @@ export BENCHMARK_DENSITY_INCREMENT=2
 export BENCHMARK_LATENCY_METRIC=p95
 
 # Run benchmark (uses env vars)
-make benchmark-density
+make benchmark-stream-density
 
 # Short aliases also work on the CLI:
-make benchmark-density TARGET_LATENCY_MS=20000 DENSITY_INCREMENT=2 LATENCY_METRIC=p95
+make benchmark-stream-density TARGET_LATENCY_MS=20000 DENSITY_INCREMENT=2 LATENCY_METRIC=p95
 ```
 
 **Using CLI Arguments (override env vars):**
@@ -572,7 +572,7 @@ make clean
 # Run benchmarks
 make benchmark-single IMAGE_ID=MCD-1001  # Quick single image test
 make benchmark                        # Full benchmark
-make benchmark-density                # Stream density test
+make benchmark-stream-density          # Stream density test
 
 # Development
 make shell                     # Shell into container
diff --git a/dine-in/docs/user-guide/how-to-use-application.md b/dine-in/docs/user-guide/how-to-use-application.md
index 7809ad6..fbda5e8 100644
--- a/dine-in/docs/user-guide/how-to-use-application.md
+++ b/dine-in/docs/user-guide/how-to-use-application.md
@@ -199,7 +199,7 @@ make benchmark BENCHMARK_WORKERS=2 BENCHMARK_DURATION=600 BENCHMARK_TARGET_DEVIC
 Tests maximum concurrent validations within latency target.
 
 ```bash
-make benchmark-density
+make benchmark-stream-density
 ```
 
 Output:

From ab1b915528993935499257d928acfd9ea23e0058 Mon Sep 17 00:00:00 2001
From: Sachin Sharma <sachin.kumar.sharma@intel.com>
Date: Tue, 17 Mar 2026 20:44:18 +0530
Subject: [PATCH 4/4] updated doc for device usage

---
 dine-in/.env.example                          |  2 +-
 dine-in/Makefile                              | 12 ++-
 dine-in/docker-compose.yml                    |  6 +-
 dine-in/docs/user-guide/get-started.md        | 10 ++-
 .../docs/user-guide/how-to-use-application.md | 10 ++-
 ovms-service/setup_models.sh                  | 80 ++++++++++++++++---
 take-away/.env.example                        |  3 +-
 take-away/Makefile                            | 15 ++--
 take-away/docker-compose.yaml                 |  8 +-
 .../docs/user-guide/benchmarking-guide.md     |  8 +-
 take-away/docs/user-guide/get-started.md      | 10 ++-
 11 files changed, 128 insertions(+), 36 deletions(-)

diff --git a/dine-in/.env.example b/dine-in/.env.example
index 4c4f6ee..4b0e108 100755
--- a/dine-in/.env.example
+++ b/dine-in/.env.example
@@ -19,7 +19,7 @@ LOG_LEVEL=INFO
 OVMS_ENDPOINT=http://ovms-vlm:8000
 OVMS_MODEL_NAME=Qwen/Qwen2.5-VL-7B-Instruct
 VLM_PRECISION=int8
-VLM_DEVICE=GPU
+TARGET_DEVICE=GPU
 SEMANTIC_SERVICE_ENDPOINT=http://semantic-service:8080
 API_TIMEOUT=60
 
diff --git a/dine-in/Makefile b/dine-in/Makefile
index 85a7c78..bbe8894 100755
--- a/dine-in/Makefile
+++ b/dine-in/Makefile
@@ -5,6 +5,12 @@
 # Dine-In Order Accuracy - Makefile
 # =============================================================================
 
+# =============================================================================
+# Load Environment Variables from .env file
+# =============================================================================
+-include .env
+export
+
 # Worker configuration for stream density testing
 WORKERS ?= 1
 ITERATIONS ?= 0
@@ -80,7 +86,7 @@ BENCHMARK_WORKERS ?= 1
 BENCHMARK_ITERATIONS ?= 1
 BENCHMARK_DURATION ?= 180
 BENCHMARK_TARGET_FPS ?= 15.0
-BENCHMARK_TARGET_DEVICE ?= GPU
+TARGET_DEVICE ?= GPU
 
 # Colors for output
 RED := \033[0;31m
@@ -393,7 +399,7 @@ benchmark:
 	@echo "╚═══════════════════════════════════════════════════════════════════╝"
 	@echo "Workers: $(BENCHMARK_WORKERS)"
 	@echo "Iterations: $(BENCHMARK_ITERATIONS)"
-	@echo "Target Device: $(BENCHMARK_TARGET_DEVICE)"
+	@echo "Target Device: $(TARGET_DEVICE)"
 	@echo "Results Dir: $(RESULTS_DIR)"
 	@echo ""
 	mkdir -p $(RESULTS_DIR)
@@ -410,7 +416,7 @@ benchmark:
 			--init_duration $(BENCHMARK_INIT_DURATION) \
 			--duration $(BENCHMARK_DURATION) \
 			--results_dir $(CURDIR)/$(RESULTS_DIR) \
-			--target_device $(BENCHMARK_TARGET_DEVICE) \
+			--target_device $(TARGET_DEVICE) \
 			--skip_perf_tools; \
 		deactivate \
 	)
diff --git a/dine-in/docker-compose.yml b/dine-in/docker-compose.yml
index 1db7168..910757f 100644
--- a/dine-in/docker-compose.yml
+++ b/dine-in/docker-compose.yml
@@ -49,7 +49,7 @@ services:
       - OVMS_ENDPOINT=${OVMS_ENDPOINT}
       - OVMS_MODEL_NAME=${OVMS_MODEL_NAME}
       - VLM_PRECISION=${VLM_PRECISION}
-      - VLM_DEVICE=${VLM_DEVICE}
+      - VLM_DEVICE=${TARGET_DEVICE:-GPU}
       - CACHE_ENABLED=true
       - CACHE_BACKEND=memory
       - PROMETHEUS_ENABLED=true
@@ -97,7 +97,7 @@ services:
       - OVMS_ENDPOINT=${OVMS_ENDPOINT}
       - OVMS_MODEL_NAME=${OVMS_MODEL_NAME}
       - VLM_PRECISION=${VLM_PRECISION}
-      - VLM_DEVICE=${VLM_DEVICE}
+      - VLM_DEVICE=${TARGET_DEVICE:-GPU}
       - METRICS_COLLECTOR_ENDPOINT=http://metrics-collector:8084
       - CONTAINER_RESULTS_PATH=/app/results
       - USECASE_1=dine-in-order-accuracy
@@ -147,7 +147,7 @@ services:
       - OVMS_ENDPOINT=${OVMS_ENDPOINT}
       - OVMS_MODEL_NAME=${OVMS_MODEL_NAME}
       - VLM_PRECISION=${VLM_PRECISION}
-      - VLM_DEVICE=${VLM_DEVICE}
+      - VLM_DEVICE=${TARGET_DEVICE:-GPU}
       - CONTAINER_RESULTS_PATH=/app/results
       - USECASE_1=dine-in-order-accuracy
       - NO_PROXY=localhost,127.0.0.1,ovms-vlm,semantic-service,host.docker.internal
diff --git a/dine-in/docs/user-guide/get-started.md b/dine-in/docs/user-guide/get-started.md
index 9f2cfd2..b5e263b 100644
--- a/dine-in/docs/user-guide/get-started.md
+++ b/dine-in/docs/user-guide/get-started.md
@@ -2,6 +2,12 @@
 
 This guide walks you through the installation, configuration, and first-run of the Dine-In Order Accuracy system for image-based plate validation.
 
+> **Note — `TARGET_DEVICE`**: To change the inference device mode, set `TARGET_DEVICE` in your `.env` file to `GPU`, `CPU`, or `AUTO`. After changing the device, re-run the setup script to update the model config:
+> ```bash
+> cd ../ovms-service && ./setup_models.sh --app dine-in
+> ```
+> You can also pass the device explicitly: `./setup_models.sh --device CPU`
+
 ---
 
 ## Table of Contents
@@ -366,13 +372,13 @@ Configuration options:
 |----------|---------|-------------|
 | `BENCHMARK_WORKERS` | 1 | Number of concurrent workers |
 | `BENCHMARK_DURATION` | 180 | Benchmark duration (seconds) |
-| `BENCHMARK_TARGET_DEVICE` | GPU | Target device: CPU, GPU, NPU |
+| `TARGET_DEVICE` | GPU | Target device: CPU, GPU, NPU |
 | `RESULTS_DIR` | results | Output directory |
 
 Example with custom settings:
 
 ```bash
-make benchmark BENCHMARK_WORKERS=2 BENCHMARK_DURATION=600
+make benchmark BENCHMARK_WORKERS=2 BENCHMARK_DURATION=600 TARGET_DEVICE=GPU
 ```
 
 ### Stream Density Test
diff --git a/dine-in/docs/user-guide/how-to-use-application.md b/dine-in/docs/user-guide/how-to-use-application.md
index fbda5e8..99fe1f4 100644
--- a/dine-in/docs/user-guide/how-to-use-application.md
+++ b/dine-in/docs/user-guide/how-to-use-application.md
@@ -2,6 +2,12 @@
 
 Guide to using the Dine-In Order Accuracy application features.
 
+> **Note — `TARGET_DEVICE`**: To change the inference device mode, set `TARGET_DEVICE` in your `.env` file to `GPU`, `CPU`, or `AUTO`. After changing the device, re-run the setup script to update the model config:
+> ```bash
+> cd ../ovms-service && ./setup_models.sh --app dine-in
+> ```
+> You can also pass the device explicitly: `./setup_models.sh --device CPU`
+
 ## Gradio UI
 
 Access the web interface at http://localhost:7861
@@ -185,13 +191,13 @@ Configuration options:
 | `BENCHMARK_INIT_DURATION` | 60 | Warmup time (seconds) |
 | `BENCHMARK_MIN_REQUESTS` | 3 | Min requests before measuring |
 | `BENCHMARK_REQUEST_TIMEOUT` | 300 | Request timeout (seconds) |
-| `BENCHMARK_TARGET_DEVICE` | GPU | Target device: CPU, GPU, NPU |
+| `TARGET_DEVICE` | GPU | Target device: CPU, GPU, NPU |
 | `RESULTS_DIR` | results | Output directory |
 | `REGISTRY` | false | Use registry images (true/false) |
 
 Example:
 ```bash
-make benchmark BENCHMARK_WORKERS=2 BENCHMARK_DURATION=600 BENCHMARK_TARGET_DEVICE=GPU
+make benchmark BENCHMARK_WORKERS=2 BENCHMARK_DURATION=600 TARGET_DEVICE=GPU
 ```
 
 ### Stream Density Test
diff --git a/ovms-service/setup_models.sh b/ovms-service/setup_models.sh
index 65c26f9..b2fcd97 100755
--- a/ovms-service/setup_models.sh
+++ b/ovms-service/setup_models.sh
@@ -6,6 +6,32 @@ SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
 PROJECT_ROOT="$(dirname "${SCRIPT_DIR}")"
 MODELS_DIR="${SCRIPT_DIR}/models"
 
+###############################################
+# TARGET DEVICE CONFIGURATION
+# Override via: TARGET_DEVICE=CPU ./setup_models.sh
+# or:           ./setup_models.sh --device CPU
+###############################################
+# Parse CLI flags: --device <DEV> and --app <dine-in|take-away>
+for arg in "$@"; do
+    case "$arg" in
+        --device=*) TARGET_DEVICE="${arg#*=}"; TARGET_DEVICE_FROM_CLI=true ;;
+        --device)   _shift_device=true ;;
+        --app=*)    SETUP_APP="${arg#*=}" ;;
+        --app)      _shift_app=true ;;
+        *)
+            if [ "${_shift_device:-}" = true ]; then
+                TARGET_DEVICE="$arg"; TARGET_DEVICE_FROM_CLI=true; _shift_device=false
+            elif [ "${_shift_app:-}" = true ]; then
+                SETUP_APP="$arg"; _shift_app=false
+            fi
+            ;;
+    esac
+done
+# If set via environment variable (not CLI), mark it so .env doesn't override
+if [ -n "${TARGET_DEVICE:-}" ] && [ -z "${TARGET_DEVICE_FROM_CLI:-}" ]; then
+    TARGET_DEVICE_FROM_CLI=true
+fi
+
 ###############################################
 # HARD CODED MODEL REGISTRY
 ###############################################
@@ -19,15 +45,49 @@ POTENTIAL_SOURCE_DIRS=(
 )
 
 ###############################################
-# LOAD OVMS_MODEL_NAME FROM take-away/.env
+# LOAD OVMS_MODEL_NAME AND TARGET_DEVICE FROM .env
 ###############################################
-ENV_FILE="${PROJECT_ROOT}/take-away/.env"
-if [ -f "${ENV_FILE}" ]; then
-    OVMS_MODEL_NAME_ENV=$(grep -E '^OVMS_MODEL_NAME=' "${ENV_FILE}" | head -1 | cut -d'=' -f2- | tr -d '"\r')
+# Determine which .env file(s) to read based on --app flag
+if [ "${SETUP_APP:-}" = "take-away" ]; then
+    _env_files=("${PROJECT_ROOT}/take-away/.env")
+elif [ "${SETUP_APP:-}" = "dine-in" ]; then
+    _env_files=("${PROJECT_ROOT}/dine-in/.env")
+else
+    # No --app specified: check both (take-away first, then dine-in)
+    _env_files=("${PROJECT_ROOT}/take-away/.env" "${PROJECT_ROOT}/dine-in/.env")
 fi
+
+TARGET_DEVICE_SOURCE=""
+for _env_file in "${_env_files[@]}"; do
+    if [ -f "${_env_file}" ]; then
+        _model=$(grep -E '^OVMS_MODEL_NAME=' "${_env_file}" 2>/dev/null | head -1 | cut -d'=' -f2- | tr -d '"\r')
+        [ -n "${_model}" ] && OVMS_MODEL_NAME_ENV="${_model}"
+        # Read TARGET_DEVICE from .env if not already set via CLI/env
+        if [ -z "${TARGET_DEVICE_FROM_CLI:-}" ]; then
+            _device=$(grep -E '^TARGET_DEVICE=' "${_env_file}" 2>/dev/null | head -1 | cut -d'=' -f2- | tr -d '"\r')
+            if [ -n "${_device}" ]; then
+                TARGET_DEVICE="${_device}"
+                TARGET_DEVICE_SOURCE="${_env_file}"
+            fi
+        fi
+    fi
+done
+
+TARGET_DEVICE="${TARGET_DEVICE:-GPU}"
+
 # Fall back to the hard-coded source model if .env is missing or unset
 OVMS_MODEL_NAME_ENV="${OVMS_MODEL_NAME_ENV:-Qwen/Qwen2.5-VL-7B-Instruct}"
 
+# Print source of TARGET_DEVICE so user knows exactly where it came from
+if [ "${TARGET_DEVICE_FROM_CLI:-}" = true ]; then
+    echo "Target device: ${TARGET_DEVICE}  (from CLI / environment variable)"
+elif [ -n "${TARGET_DEVICE_SOURCE}" ]; then
+    echo "Target device: ${TARGET_DEVICE}  (from ${TARGET_DEVICE_SOURCE})"
+else
+    echo "Target device: ${TARGET_DEVICE}  (default — no .env found)"
+fi
+echo ""
+
 ###############################################
 echo "=========================================="
 echo "OVMS Model Setup for Order Accuracy"
@@ -135,7 +195,7 @@ export_model() {
       --source_model "${SOURCE_MODEL}" \
       --weight-format int8 \
       --pipeline_type VLM_CB \
-      --target_device GPU \
+      --target_device "${TARGET_DEVICE}" \
       --cache_size 32 \
       --max_num_seqs 1 \
       --enable_prefix_caching \
@@ -241,7 +301,9 @@ apply_graph_config() {
     local GRAPH_FILE="${MODELS_DIR}/Qwen/${MODEL_NAME}/graph.pbtxt"
 
     if [ ! -f "${GRAPH_OPTIONS_FILE}" ]; then
-        echo "  No graph_options.json found, keeping existing graph.pbtxt"
+        echo "  No graph_options.json found, updating device to ${TARGET_DEVICE} in graph.pbtxt"
+        sed -i "s/device: \"[A-Z]*\"/device: \"${TARGET_DEVICE}\"/g" "${GRAPH_FILE}"
+        echo "  ✓ graph.pbtxt device set to ${TARGET_DEVICE}"
         return 0
     fi
 
@@ -250,8 +312,8 @@ apply_graph_config() {
     echo "Applying graph_options.json to graph.pbtxt"
     echo "------------------------------------------"
 
-    python3 - "${GRAPH_OPTIONS_FILE}" "${GRAPH_FILE}" << 'PYEOF'
-import json, sys
+    TARGET_DEVICE="${TARGET_DEVICE}" python3 - "${GRAPH_OPTIONS_FILE}" "${GRAPH_FILE}" << 'PYEOF'
+import json, sys, os
 
 graph_options_file = sys.argv[1]
 graph_file = sys.argv[2]
@@ -269,7 +331,7 @@ dynamic_split = 'true' if opts.get('dynamic_split_fuse', False) else 'false'
 max_num_seqs = opts.get('max_num_seqs', 4)
 cache_size = opts.get('cache_size', 10)
 max_num_batched_tokens = opts.get('max_num_batched_tokens', 4096)
-device = opts.get('device', 'GPU')
+device = opts.get('device', os.environ.get('TARGET_DEVICE', 'GPU'))
 
 graph = f'''input_stream: "HTTP_REQUEST_PAYLOAD:input"
 output_stream: "HTTP_RESPONSE_PAYLOAD:output"
diff --git a/take-away/.env.example b/take-away/.env.example
index a0ea44a..f510cf1 100644
--- a/take-away/.env.example
+++ b/take-away/.env.example
@@ -30,11 +30,10 @@ VLM_BACKEND=ovms
 OVMS_ENDPOINT=http://ovms-vlm:8000
 OVMS_MODEL_NAME=Qwen/Qwen2.5-VL-7B-Instruct
 VLM_PRECISION=int8
-VLM_DEVICE=GPU
+TARGET_DEVICE=GPU
 OVMS_TIMEOUT=120
 
 # OpenVINO local settings (when VLM_BACKEND=openvino)
-OPENVINO_DEVICE=GPU
 VLM_MODEL_PATH=/model/Qwen2.5-VL-7B-Instruct
 
 # -----------------------------------------------------------------------------
diff --git a/take-away/Makefile b/take-away/Makefile
index b1db282..3c79105 100644
--- a/take-away/Makefile
+++ b/take-away/Makefile
@@ -32,7 +32,8 @@ SCALING_MODE ?= fixed
 VLM_BACKEND ?= ovms
 OVMS_ENDPOINT ?= http://ovms-vlm:8000
 OVMS_MODEL_NAME ?= Qwen/Qwen2.5-VL-7B-Instruct
-OPENVINO_DEVICE ?= GPU
+OPENVINO_DEVICE ?= $(TARGET_DEVICE)
+TARGET_DEVICE ?= GPU
 
 # Semantic Service
 SEMANTIC_VLM_BACKEND ?= ovms
@@ -131,7 +132,7 @@ download-sample-video: setup-dirs
 
 # Required variables for specific modes
 REQUIRED_OVMS_VARS := OVMS_ENDPOINT OVMS_MODEL_NAME
-REQUIRED_OPENVINO_VARS := OPENVINO_DEVICE VLM_MODEL_PATH
+REQUIRED_OPENVINO_VARS := TARGET_DEVICE VLM_MODEL_PATH
 
 check-env:
 	@echo "$(BLUE)Checking environment configuration...$(NC)"
@@ -230,7 +231,7 @@ show-config:
 		echo "  OVMS_MODEL_NAME      = $(OVMS_MODEL_NAME)"; \
 	else \
 		echo "  VLM_MODEL_PATH       = $(VLM_MODEL_PATH)"; \
-		echo "  OPENVINO_DEVICE      = $(OPENVINO_DEVICE)"; \
+		echo "  TARGET_DEVICE        = $(TARGET_DEVICE)"; \
 	fi
 	@echo ""
 	@echo "$(YELLOW)Semantic Service:$(NC)"
@@ -426,7 +427,7 @@ benchmark: setup-dirs ## Run Order Accuracy benchmark (uses benchmark_order_accu
 	@echo "Workers: $(BENCHMARK_WORKERS)"
 	@echo "Duration: $(BENCHMARK_DURATION)s"
 	@echo "Init Duration: $(BENCHMARK_INIT_DURATION)s"
-	@echo "Target Device: $(OPENVINO_DEVICE)"
+	@echo "Target Device: $(TARGET_DEVICE)"
 	@echo ""
 	cd $(PERF_TOOLS_DIR) && \
 	( \
@@ -440,7 +441,7 @@ benchmark: setup-dirs ## Run Order Accuracy benchmark (uses benchmark_order_accu
 			--init_duration $(BENCHMARK_INIT_DURATION) \
 			--profile benchmark \
 			--results_dir $(CURDIR)/$(RESULTS_DIR) \
-			--target_device $(OPENVINO_DEVICE) \
+			--target_device $(TARGET_DEVICE) \
 			--skip_perf_tools \
 			--skip_export; \
 		deactivate \
@@ -480,7 +481,7 @@ benchmark-oa: setup-dirs ## Run Order Accuracy benchmark with fixed workers
 	@echo "Workers: $(BENCHMARK_WORKERS)"
 	@echo "Duration: $(BENCHMARK_DURATION)s"
 	@echo "Init Duration: $(BENCHMARK_INIT_DURATION)s"
-	@echo "Target Device: $(OPENVINO_DEVICE)"
+	@echo "Target Device: $(TARGET_DEVICE)"
 	@echo ""
 	cd $(PERF_TOOLS_DIR) && \
 	( \
@@ -494,7 +495,7 @@ benchmark-oa: setup-dirs ## Run Order Accuracy benchmark with fixed workers
 			--init_duration $(BENCHMARK_INIT_DURATION) \
 			--profile benchmark \
 			--results_dir $(CURDIR)/$(RESULTS_DIR) \
-			--target_device $(OPENVINO_DEVICE) \
+			--target_device $(TARGET_DEVICE) \
 			--skip_perf_tools \
 			--skip_export; \
 		deactivate \
diff --git a/take-away/docker-compose.yaml b/take-away/docker-compose.yaml
index 3f20e4d..c9a7968 100755
--- a/take-away/docker-compose.yaml
+++ b/take-away/docker-compose.yaml
@@ -103,13 +103,13 @@ services:
       # VLM Backend (embedded or ovms)
       VLM_BACKEND: ${VLM_BACKEND:-ovms}
       VLM_MODEL_PATH: ${VLM_MODEL_PATH:-/model/Qwen2.5-VL-7B-Instruct}
-      OPENVINO_DEVICE: ${OPENVINO_DEVICE:-GPU}
+      OPENVINO_DEVICE: ${TARGET_DEVICE:-GPU}
       
       # OVMS settings (when VLM_BACKEND=ovms)
       OVMS_ENDPOINT: ${OVMS_ENDPOINT:-http://ovms-vlm:8000}
       OVMS_MODEL_NAME: ${OVMS_MODEL_NAME:-Qwen/Qwen2.5-VL-7B-Instruct}
       VLM_PRECISION: ${VLM_PRECISION:-int8}
-      VLM_DEVICE: ${VLM_DEVICE:-GPU}
+      VLM_DEVICE: ${TARGET_DEVICE:-GPU}
       OVMS_TIMEOUT: ${OVMS_TIMEOUT:-120}
       
       # Semantic service
@@ -281,11 +281,11 @@ services:
       - OVMS_ENDPOINT=http://ovms-vlm:8000
       - OVMS_MODEL_NAME=${OVMS_MODEL_NAME:-Qwen/Qwen2.5-VL-7B-Instruct}
       - VLM_PRECISION=${VLM_PRECISION:-int8}
-      - VLM_DEVICE=${VLM_DEVICE:-GPU}
+      - VLM_DEVICE=${TARGET_DEVICE:-GPU}
       - OVMS_TIMEOUT=${OVMS_TIMEOUT:-60}
       # OpenVINO settings (when VLM_BACKEND=openvino)
       - OPENVINO_MODEL_PATH=${OPENVINO_MODEL_PATH:-/models/Qwen2.5-VL-7B-Instruct}
-      - OPENVINO_DEVICE=${OPENVINO_DEVICE:-GPU}
+      - OPENVINO_DEVICE=${TARGET_DEVICE:-GPU}
       # Cache settings
       - CACHE_ENABLED=true
       - CACHE_BACKEND=memory
diff --git a/take-away/docs/user-guide/benchmarking-guide.md b/take-away/docs/user-guide/benchmarking-guide.md
index f0d679d..6c05dc2 100644
--- a/take-away/docs/user-guide/benchmarking-guide.md
+++ b/take-away/docs/user-guide/benchmarking-guide.md
@@ -2,6 +2,12 @@
 
 This guide covers performance testing, stream density benchmarking, and metrics collection for the Take-Away Order Accuracy system.
 
+> **Note — `TARGET_DEVICE`**: To change the inference device mode, set `TARGET_DEVICE` in your `.env` file to `GPU`, `CPU`, or `AUTO`. After changing the device, re-run the setup script to update the model config:
+> ```bash
+> cd ../ovms-service && ./setup_models.sh --app take-away
+> ```
+> You can also pass the device explicitly: `./setup_models.sh --device CPU`
+
 ---
 
 ## Table of Contents
@@ -562,7 +568,7 @@ VLM_BATCH_SIZE=4
 VLM_BATCH_TIMEOUT_MS=100
 
 # For memory-constrained systems
-OPENVINO_DEVICE=CPU
+TARGET_DEVICE=CPU
 VLM_MAX_CONCURRENT=2
 ```
 
diff --git a/take-away/docs/user-guide/get-started.md b/take-away/docs/user-guide/get-started.md
index 7e6f557..6b603ea 100644
--- a/take-away/docs/user-guide/get-started.md
+++ b/take-away/docs/user-guide/get-started.md
@@ -2,6 +2,12 @@
 
 This guide walks you through the installation, configuration, and first-run of the Take-Away Order Accuracy system.
 
+> **Note — `TARGET_DEVICE`**: To change the inference device mode, set `TARGET_DEVICE` in your `.env` file to `GPU`, `CPU`, or `AUTO`. After changing the device, re-run the setup script to update the model config:
+> ```bash
+> cd ../ovms-service && ./setup_models.sh --app take-away
+> ```
+> You can also pass the device explicitly: `./setup_models.sh --device CPU`
+
 ---
 
 ## Table of Contents
@@ -147,7 +153,7 @@ For multiple stations, increase `WORKERS` (e.g., `WORKERS=3`) to create `station
 VLM_BACKEND=ovms
 OVMS_ENDPOINT=http://ovms-vlm:8000
 OVMS_MODEL_NAME=Qwen/Qwen2.5-VL-7B-Instruct
-OPENVINO_DEVICE=GPU          # 'GPU', 'CPU', or 'AUTO'
+TARGET_DEVICE=GPU            # 'GPU', 'CPU', or 'AUTO'
 
 # =============================================================================
 # Semantic Service
@@ -399,7 +405,7 @@ make up
 export VLM_BATCH_SIZE=1
 
 # Use CPU instead of GPU (slower but less memory)
-export OPENVINO_DEVICE=CPU
+export TARGET_DEVICE=CPU
 
 # Restart services
 make down && make up