From 5fb9cbbc6b581f0473956ea595134f6299f0fe28 Mon Sep 17 00:00:00 2001 From: praveenkk123 Date: Tue, 16 Dec 2025 10:58:39 -0800 Subject: [PATCH] Potential fix for code scanning alert no. 58: Uncontrolled data used in path expression Co-authored-by: Copilot Autofix powered by AI <62310815+github-advanced-security[bot]@users.noreply.github.com> --- .../st_video_rag_demo.py | 24 +++++++++++++++---- 1 file changed, 19 insertions(+), 5 deletions(-) diff --git a/Video-Description-Generation-Query-Retrieval/st_video_rag_demo.py b/Video-Description-Generation-Query-Retrieval/st_video_rag_demo.py index 24417e1..235fbdc 100644 --- a/Video-Description-Generation-Query-Retrieval/st_video_rag_demo.py +++ b/Video-Description-Generation-Query-Retrieval/st_video_rag_demo.py @@ -9,6 +9,10 @@ import streamlit as st from sentence_transformers import SentenceTransformer +# Define a safe dataset root directory +SAFE_DATASET_ROOT = os.path.abspath("./datasets") +os.makedirs(SAFE_DATASET_ROOT, exist_ok=True) + warnings.filterwarnings("ignore") logging.basicConfig(level=logging.INFO) @@ -149,7 +153,7 @@ # Dataset configuration st.subheader("📁 Dataset") - dataset_folder = st.text_input("Video Folder", ".") + dataset_folder = st.text_input(f"Video Folder (relative to {SAFE_DATASET_ROOT})", ".") max_videos = st.slider("Max Videos", 1, 128, 20) st.markdown("---") @@ -241,14 +245,24 @@ def generate_video_description_ollama(video_path, model, max_tokens=100, tempera def get_video_paths(folder, max_count): - """Get video file paths from folder.""" + """Get video file paths from folder. Validates that 'folder' is within SAFE_DATASET_ROOT.""" try: video_extensions = ['.mp4', '.avi', '.mov', '.mkv', '.flv', '.wmv'] video_files = [] - folder_path = os.path.abspath(folder) - - for root, dirs, files in os.walk(folder_path): + # Normalize and validate to ensure folder stays inside SAFE_DATASET_ROOT + user_folder = folder.strip() + # If the user gives an absolute path, remove its "/" + user_folder = os.path.relpath(user_folder, "/") if os.path.isabs(user_folder) else user_folder + safe_target_path = os.path.normpath(os.path.abspath(os.path.join(SAFE_DATASET_ROOT, user_folder))) + if not safe_target_path.startswith(SAFE_DATASET_ROOT): + logging.error(f"Attempted access to forbidden folder: {safe_target_path}") + return [] + if not os.path.isdir(safe_target_path): + logging.error(f"Folder does not exist: {safe_target_path}") + return [] + + for root, dirs, files in os.walk(safe_target_path): video_files.extend([ os.path.join(root, f) for f in files if any(f.lower().endswith(ext) for ext in video_extensions)