intel-retail · sainijit · Mar 17, 2026 · Mar 17, 2026 · Mar 17, 2026 · Mar 17, 2026
diff --git a/README.md b/README.md
@@ -277,7 +277,7 @@ make down                   # Stop services
 make logs                   # View logs
 make update-submodules      # Initialize performance-tools (required before benchmarking)
 make benchmark              # Run benchmark
-make benchmark-density      # Run stream density test
+make benchmark-stream-density      # Run stream density test
 make benchmark-density-results  # View density benchmark results
 ```
 

diff --git a/dine-in/.env.example b/dine-in/.env.example
@@ -19,7 +19,7 @@ LOG_LEVEL=INFO
 OVMS_ENDPOINT=http://ovms-vlm:8000
 OVMS_MODEL_NAME=Qwen/Qwen2.5-VL-7B-Instruct
 VLM_PRECISION=int8
-VLM_DEVICE=GPU
+TARGET_DEVICE=GPU
 SEMANTIC_SERVICE_ENDPOINT=http://semantic-service:8080
 API_TIMEOUT=60
 

diff --git a/dine-in/Makefile b/dine-in/Makefile
@@ -5,6 +5,12 @@
 # Dine-In Order Accuracy - Makefile
 # =============================================================================
 
+# =============================================================================
+# Load Environment Variables from .env file
+# =============================================================================
+-include .env
+export
+
 # Worker configuration for stream density testing
 WORKERS ?= 1
 ITERATIONS ?= 0
@@ -17,7 +23,7 @@ CONCURRENCY_MAX ?= 10
 REQUESTS_PER_LEVEL ?= 10
 REQUEST_TIMEOUT ?= 30
 RESULTS_DIR ?= results
-METRICS_DIR ?= metrics
+METRICS_DIR ?= results
 OOM_PROTECTION ?= 1
 
 # Performance tools path
@@ -80,7 +86,7 @@ BENCHMARK_WORKERS ?= 1
 BENCHMARK_ITERATIONS ?= 1
 BENCHMARK_DURATION ?= 180
 BENCHMARK_TARGET_FPS ?= 15.0
-BENCHMARK_TARGET_DEVICE ?= GPU
+TARGET_DEVICE ?= GPU
 
 # Colors for output
 RED := \033[0;31m
@@ -393,7 +399,7 @@ benchmark:
 	@echo "╚═══════════════════════════════════════════════════════════════════╝"
 	@echo "Workers: $(BENCHMARK_WORKERS)"
 	@echo "Iterations: $(BENCHMARK_ITERATIONS)"
-	@echo "Target Device: $(BENCHMARK_TARGET_DEVICE)"
+	@echo "Target Device: $(TARGET_DEVICE)"
 	@echo "Results Dir: $(RESULTS_DIR)"
 	@echo ""
 	mkdir -p $(RESULTS_DIR)
@@ -410,7 +416,7 @@ benchmark:
 			--init_duration $(BENCHMARK_INIT_DURATION) \
 			--duration $(BENCHMARK_DURATION) \
 			--results_dir $(CURDIR)/$(RESULTS_DIR) \
-			--target_device $(BENCHMARK_TARGET_DEVICE) \
+			--target_device $(TARGET_DEVICE) \
 			--skip_perf_tools; \
 		deactivate \
 	)
@@ -431,10 +437,7 @@ benchmark-metrics:
 		echo "No benchmark metrics found. Run 'make benchmark' first."; \
 	fi
 
-benchmark-stream-density: benchmark-density
-	@echo "Note: benchmark-stream-density is deprecated, use 'make benchmark-density' instead"
-
-benchmark-density: ## Run Dine-In stream density benchmark (image-based latency)
+benchmark-stream-density: ## Run Dine-In stream density benchmark (image-based latency)
 	@if [ "$(OOM_PROTECTION)" = "0" ]; then \
 		echo "╔════════════════════════════════════════════════════════════╗"; \
 		echo "║ WARNING                                                    ║"; \
@@ -558,7 +561,7 @@ plot-metrics: ## Generate plots from benchmark metrics
 	@echo "╔═══════════════════════════════════════════════════════════════════╗"
 	@echo "║                   Generating Metrics Plots                        ║"
 	@echo "╚═══════════════════════════════════════════════════════════════════╝"
-	@sudo chmod -R ugo+rw $(CURDIR)/$(METRICS_DIR) 2>/dev/null || true
+	@chmod -R ugo+rw $(CURDIR)/$(METRICS_DIR) 2>/dev/null || true
 	cd $(PERF_TOOLS_DIR) && \
 	( \
 		python3 -m venv venv && \

diff --git a/dine-in/README.md b/dine-in/README.md
@@ -14,7 +14,7 @@
 ### Prerequisites
 
 - Docker 24.0+ with Compose V2
-- NVIDIA GPU with 8GB+ VRAM (or Intel GPU)
+- Intel GPU
 - 32GB+ RAM recommended
 - Intel Xeon or equivalent CPU
 

diff --git a/dine-in/docker-compose.yml b/dine-in/docker-compose.yml
@@ -49,7 +49,7 @@ services:
       - OVMS_ENDPOINT=${OVMS_ENDPOINT}
       - OVMS_MODEL_NAME=${OVMS_MODEL_NAME}
       - VLM_PRECISION=${VLM_PRECISION}
-      - VLM_DEVICE=${VLM_DEVICE}
+      - VLM_DEVICE=${TARGET_DEVICE:-GPU}
       - CACHE_ENABLED=true
       - CACHE_BACKEND=memory
       - PROMETHEUS_ENABLED=true
@@ -97,7 +97,7 @@ services:
       - OVMS_ENDPOINT=${OVMS_ENDPOINT}
       - OVMS_MODEL_NAME=${OVMS_MODEL_NAME}
       - VLM_PRECISION=${VLM_PRECISION}
-      - VLM_DEVICE=${VLM_DEVICE}
+      - VLM_DEVICE=${TARGET_DEVICE:-GPU}
       - METRICS_COLLECTOR_ENDPOINT=http://metrics-collector:8084
       - CONTAINER_RESULTS_PATH=/app/results
       - USECASE_1=dine-in-order-accuracy
@@ -147,7 +147,7 @@ services:
       - OVMS_ENDPOINT=${OVMS_ENDPOINT}
       - OVMS_MODEL_NAME=${OVMS_MODEL_NAME}
       - VLM_PRECISION=${VLM_PRECISION}
-      - VLM_DEVICE=${VLM_DEVICE}
+      - VLM_DEVICE=${TARGET_DEVICE:-GPU}
       - CONTAINER_RESULTS_PATH=/app/results
       - USECASE_1=dine-in-order-accuracy
       - NO_PROXY=localhost,127.0.0.1,ovms-vlm,semantic-service,host.docker.internal
@@ -180,7 +180,7 @@ services:
         - METRICS_DIR=/tmp/results
         - DEVICE_ENV_PATH=/configs/device.env
       volumes:
-        - ./metrics:/tmp/results
+        - ./results:/tmp/results
         - ./configs:/configs
         - /tmp/.X11-unix:/tmp/.X11-unix
         - /sys/devices:/sys/devices

diff --git a/dine-in/docs/user-guide/get-started.md b/dine-in/docs/user-guide/get-started.md
@@ -2,6 +2,12 @@
 
 This guide walks you through the installation, configuration, and first-run of the Dine-In Order Accuracy system for image-based plate validation.
 
+> **Note — `TARGET_DEVICE`**: To change the inference device mode, set `TARGET_DEVICE` in your `.env` file to `GPU`, `CPU`, or `AUTO`. After changing the device, re-run the setup script to update the model config:
+> ```bash
+> cd ../ovms-service && ./setup_models.sh --app dine-in
+> ```
+> You can also pass the device explicitly: `./setup_models.sh --device CPU`
+
 ---
 
 ## Table of Contents
@@ -24,7 +30,7 @@ This guide walks you through the installation, configuration, and first-run of t
 |-----------|---------|-------------|
 | CPU | Intel Xeon 8 cores | Intel Xeon 16+ cores |
 | RAM | 16GB | 32GB+ |
-| GPU | Intel Arc A770 (8GB) | Intel Arc / NVIDIA RTX 3080+ |
+| GPU | Intel Arc A770 (8GB) | Intel Arc |
 | Storage | 50GB SSD | 200GB NVMe |
 | Network | 1 Gbps | 10 Gbps |
 
@@ -34,7 +40,6 @@ This guide walks you through the installation, configuration, and first-run of t
 |----------|---------|---------|
 | Docker | 24.0+ | Container runtime |
 | Docker Compose | V2+ | Service orchestration |
-| NVIDIA Driver | 535+ | GPU support (if NVIDIA) |
 | Intel GPU Driver | Latest | GPU support (if Intel) |
 | Python | 3.10+ | Local development (optional) |
 
@@ -49,10 +54,6 @@ docker --version
 docker compose version
 # Expected: Docker Compose version v2.x.x
 
-# GPU availability (NVIDIA)
-nvidia-smi
-# OR for Intel
-clinfo | head -20
 ```
 
 ---
@@ -212,15 +213,15 @@ This starts 4 containers:
 To measure the maximum number of concurrent image validations the system can sustain under a latency target:
 
 ```bash
-make benchmark-density
+make benchmark-stream-density
 ```
 
 This automatically scales concurrent requests up, measuring end-to-end latency at each level, and stops when the target latency (default 15s) is exceeded. Results are saved to `./results/`.
 
 Override defaults via environment or CLI:
 
 ```bash
-make benchmark-density \
+make benchmark-stream-density \
   BENCHMARK_TARGET_LATENCY_MS=20000 \
   BENCHMARK_INIT_DURATION=30
 ```
@@ -371,19 +372,19 @@ Configuration options:
 |----------|---------|-------------|
 | `BENCHMARK_WORKERS` | 1 | Number of concurrent workers |
 | `BENCHMARK_DURATION` | 180 | Benchmark duration (seconds) |
-| `BENCHMARK_TARGET_DEVICE` | GPU | Target device: CPU, GPU, NPU |
+| `TARGET_DEVICE` | GPU | Target device: CPU, GPU, NPU |
 | `RESULTS_DIR` | results | Output directory |
 
 Example with custom settings:
 
 ```bash
-make benchmark BENCHMARK_WORKERS=2 BENCHMARK_DURATION=600
+make benchmark BENCHMARK_WORKERS=2 BENCHMARK_DURATION=600 TARGET_DEVICE=GPU
 ```
 
 ### Stream Density Test
 
 ```bash
-make benchmark-density
+make benchmark-stream-density
 ```
 
 ### Stream Density Configuration
@@ -412,10 +413,10 @@ export BENCHMARK_DENSITY_INCREMENT=2
 export BENCHMARK_LATENCY_METRIC=p95
 
 # Run benchmark (uses env vars)
-make benchmark-density
+make benchmark-stream-density
 
 # Short aliases also work on the CLI:
-make benchmark-density TARGET_LATENCY_MS=20000 DENSITY_INCREMENT=2 LATENCY_METRIC=p95
+make benchmark-stream-density TARGET_LATENCY_MS=20000 DENSITY_INCREMENT=2 LATENCY_METRIC=p95
 ```
 
 **Using CLI Arguments (override env vars):**
@@ -447,8 +448,7 @@ docker logs dinein_ovms_vlm
 ls -la ../ovms-service/models/
 
 # Check GPU availability
-clinfo | head -20  # Intel
-nvidia-smi         # NVIDIA
+clinfo | head -20  # Inte
 ```
 
 ### Connection Refused to OVMS
@@ -486,7 +486,7 @@ netstat -tulpn | grep -E "7861|8083|8002|8081"
 
 **Solution**:
 - Ensure GPU drivers are installed
-- Check GPU utilization: `intel_gpu_top` or `nvidia-smi`
+- Check GPU utilization: `intel_gpu_top`
 - Verify OVMS is using GPU in logs: `docker logs dinein_ovms_vlm | grep -i gpu`
 - Consider reducing image resolution in preprocessing
 
@@ -522,9 +522,6 @@ sudo usermod -aG video $USER
 # Verify GPU access
 ls -la /dev/dri/
 
-# For NVIDIA
-nvidia-smi
-sudo systemctl restart docker
 ```
 
 ### No Scenarios Available in UI
@@ -581,7 +578,7 @@ make clean
 # Run benchmarks
 make benchmark-single IMAGE_ID=MCD-1001  # Quick single image test
 make benchmark                        # Full benchmark
-make benchmark-density                # Stream density test
+make benchmark-stream-density          # Stream density test
 
 # Development
 make shell                     # Shell into container

diff --git a/dine-in/docs/user-guide/how-to-use-application.md b/dine-in/docs/user-guide/how-to-use-application.md
@@ -2,6 +2,12 @@
 
 Guide to using the Dine-In Order Accuracy application features.
 
+> **Note — `TARGET_DEVICE`**: To change the inference device mode, set `TARGET_DEVICE` in your `.env` file to `GPU`, `CPU`, or `AUTO`. After changing the device, re-run the setup script to update the model config:
+> ```bash
+> cd ../ovms-service && ./setup_models.sh --app dine-in
+> ```
+> You can also pass the device explicitly: `./setup_models.sh --device CPU`
+
 ## Gradio UI
 
 Access the web interface at http://localhost:7861
@@ -185,21 +191,21 @@ Configuration options:
 | `BENCHMARK_INIT_DURATION` | 60 | Warmup time (seconds) |
 | `BENCHMARK_MIN_REQUESTS` | 3 | Min requests before measuring |
 | `BENCHMARK_REQUEST_TIMEOUT` | 300 | Request timeout (seconds) |
-| `BENCHMARK_TARGET_DEVICE` | GPU | Target device: CPU, GPU, NPU |
+| `TARGET_DEVICE` | GPU | Target device: CPU, GPU, NPU |
 | `RESULTS_DIR` | results | Output directory |
 | `REGISTRY` | false | Use registry images (true/false) |
 
 Example:
 ```bash
-make benchmark BENCHMARK_WORKERS=2 BENCHMARK_DURATION=600 BENCHMARK_TARGET_DEVICE=GPU
+make benchmark BENCHMARK_WORKERS=2 BENCHMARK_DURATION=600 TARGET_DEVICE=GPU
 ```
 
 ### Stream Density Test
 
 Tests maximum concurrent validations within latency target.
 
 ```bash
-make benchmark-density
+make benchmark-stream-density
 ```
 
 Output: