From 36700b2dcfb2da5c42b73212b0bfef6da875da73 Mon Sep 17 00:00:00 2001
From: Alex Qiu <alexander@browserbase.com>
Date: Thu, 12 Feb 2026 18:07:56 -0800
Subject: [PATCH 1/5] =?UTF-8?q?dynamic=20menu=20scraper=20template=20?=
 =?UTF-8?q?=E2=80=93=20WIP?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .../menu-dynamic-extraction-demo/.env.example |  15 ++
 .../menu-dynamic-extraction-demo/.gitignore   |  34 +++
 python/menu-dynamic-extraction-demo/README.md | 112 ++++++++
 python/menu-dynamic-extraction-demo/config.py |  36 +++
 python/menu-dynamic-extraction-demo/main.py   | 147 ++++++++++
 python/menu-dynamic-extraction-demo/models.py |  98 +++++++
 .../pyproject.toml                            |  35 +++
 .../menu-dynamic-extraction-demo/scraper.py   | 255 ++++++++++++++++++
 python/menu-dynamic-extraction-demo/utils.py  | 117 ++++++++
 9 files changed, 849 insertions(+)
 create mode 100644 python/menu-dynamic-extraction-demo/.env.example
 create mode 100644 python/menu-dynamic-extraction-demo/.gitignore
 create mode 100644 python/menu-dynamic-extraction-demo/README.md
 create mode 100644 python/menu-dynamic-extraction-demo/config.py
 create mode 100644 python/menu-dynamic-extraction-demo/main.py
 create mode 100644 python/menu-dynamic-extraction-demo/models.py
 create mode 100644 python/menu-dynamic-extraction-demo/pyproject.toml
 create mode 100644 python/menu-dynamic-extraction-demo/scraper.py
 create mode 100644 python/menu-dynamic-extraction-demo/utils.py

diff --git a/python/menu-dynamic-extraction-demo/.env.example b/python/menu-dynamic-extraction-demo/.env.example
new file mode 100644
index 00000000..79ab44e8
--- /dev/null
+++ b/python/menu-dynamic-extraction-demo/.env.example
@@ -0,0 +1,15 @@
+# Browserbase credentials (required)
+# Get these from https://www.browserbase.com/settings
+BROWSERBASE_PROJECT_ID=your_browserbase_project_id
+BROWSERBASE_API_KEY=your_browserbase_api_key
+
+# Google API key for Gemini model (required for Stagehand)
+# Get your key from https://aistudio.google.com/apikey
+GOOGLE_API_KEY=your_google_api_key
+
+# Optional: Logging configuration
+# LOG_LEVEL=INFO
+
+# Optional: File paths
+# WEBSITES_FILE=websites.txt
+# OUTPUT_DIR=results
diff --git a/python/menu-dynamic-extraction-demo/.gitignore b/python/menu-dynamic-extraction-demo/.gitignore
new file mode 100644
index 00000000..a95c2797
--- /dev/null
+++ b/python/menu-dynamic-extraction-demo/.gitignore
@@ -0,0 +1,34 @@
+# Environment variables (CRITICAL - contains API keys)
+.env
+
+# Python
+__pycache__/
+*.py[cod]
+*$py.class
+*.so
+.Python
+venv/
+ENV/
+env/
+*.egg-info/
+dist/
+build/
+
+# IDE
+.vscode/
+.idea/
+*.swp
+*.swo
+.DS_Store
+
+# Output directories
+results/
+output/
+*.json
+*.csv
+
+# Logs
+*.log
+
+# Jupyter
+.ipynb_checkpoints/
diff --git a/python/menu-dynamic-extraction-demo/README.md b/python/menu-dynamic-extraction-demo/README.md
new file mode 100644
index 00000000..28ade423
--- /dev/null
+++ b/python/menu-dynamic-extraction-demo/README.md
@@ -0,0 +1,112 @@
+# Stagehand + Browserbase: Restaurant Menu Extractor
+
+## AT A GLANCE
+
+- **Goal**: Automate restaurant menu extraction from websites using AI-powered browser automation to scrape menu items, prices, descriptions, and categories.
+- **Pattern Template**: Demonstrates web scraping with Stagehand's observe/act/extract pattern for navigating complex restaurant websites and parsing menu structures.
+- **Workflow**: Stagehand navigates to restaurant website, finds menu links using observe, extracts structured data with Pydantic schemas, handles multi-section menus (lunch/dinner/drinks), and outputs JSON results.
+- **Multi-Section Support**: Automatically detects menu subsections (Lunch, Dinner, Happy Hour, etc.) and extracts each separately for comprehensive coverage.
+- **Production-Ready**: Includes retry logic, popup handling, logging, error recovery, and parallel processing capabilities for batch extraction.
+- Docs → [Stagehand Act](https://docs.stagehand.dev/basics/act) | [Stagehand Observe](https://docs.stagehand.dev/basics/observe) | [Stagehand Extract](https://docs.stagehand.dev/basics/extract)
+
+## GLOSSARY
+
+- **observe**: Find and return interactive elements on the page matching a description without performing actions. Used here to locate menu links and subsections.
+  Docs → https://docs.stagehand.dev/basics/observe
+- **act**: Perform UI actions from natural language prompts (click buttons, navigate links). Used to click menu links discovered via observe.
+  Docs → https://docs.stagehand.dev/basics/act
+- **extract**: Pull structured data from web pages using natural language instructions and Pydantic schemas. Ensures menu data is consistently formatted.
+  Docs → https://docs.stagehand.dev/basics/extract
+- **Pydantic schemas**: Type-safe data models that define the structure of extracted menu data (sections, categories, items, prices).
+  Docs → https://docs.pydantic.dev/
+- **BYOB (Bring Your Own Browser)**: Run Stagehand sessions on Browserbase's cloud infrastructure for reliability, scalability, and live debugging.
+  Docs → https://docs.browserbase.com
+
+## QUICKSTART
+
+1. cd python/restaurant-demo
+2. Install dependencies with uv:
+
+   ```bash
+   uv pip install -e .
+   ```
+
+   Alternatively, use pip:
+
+   ```bash
+   python -m venv venv
+   source venv/bin/activate  # On Windows: venv\Scripts\activate
+   pip install -e .
+   ```
+
+3. cp .env.example .env
+4. Add required API keys to .env:
+   - `BROWSERBASE_PROJECT_ID` - Get from https://www.browserbase.com/settings
+   - `BROWSERBASE_API_KEY` - Get from https://www.browserbase.com/settings
+   - `GOOGLE_API_KEY` - Get from https://aistudio.google.com/apikey
+5. Run the script:
+   ```bash
+   python main.py
+   ```
+   The script will prompt you for a restaurant website URL.
+
+## EXPECTED OUTPUT
+
+- Prompts for restaurant website URL input
+- Initializes Stagehand session with Browserbase (verbose logging shows browser actions)
+- Navigates to the restaurant website and attempts to close any popups/modals
+- Uses observe to find the menu link (retries up to 3 times if needed)
+- Clicks the menu link and navigates to menu page
+- Detects all menu subsections (Lunch, Dinner, Drinks, etc.) via observe
+- For each subsection:
+  - Navigates to that section
+  - Extracts structured menu data: sections → categories → items (name, description, price)
+- All extraction results are stored in the Stagehand session (can be extended to write JSON files)
+- Session closes cleanly after extraction completes
+
+Example log output:
+```
+INFO: Navigating to https://example-restaurant.com ...
+INFO: Menu link found: ['https://example-restaurant.com/menu']
+INFO: Navigating to menu section: Lunch Menu ...
+INFO: Extracting menu section: Lunch Menu
+INFO: Navigating to menu section: Dinner Menu ...
+INFO: Session closed successfully
+```
+
+## COMMON PITFALLS
+
+- "ModuleNotFoundError: No module named 'stagehand'": Ensure you installed dependencies with `uv pip install -e .` or `pip install -e .`
+- Missing API keys: Verify .env contains BROWSERBASE_PROJECT_ID, BROWSERBASE_API_KEY, and GOOGLE_API_KEY
+- "Could not find menu link after multiple attempts": The restaurant website may have an unusual structure. Try manually checking if there's a clear "Menu" link. Increase MAX_RETRIES in config if needed.
+- Popup/modal blocking: The script attempts to close popups automatically, but some sites have persistent overlays. Check the Browserbase live view link to debug.
+- Empty extraction results: Some restaurant sites load menus dynamically or via iframes. The script skips iframe links automatically but may need manual adjustment for special cases.
+- Stagehand verbose=2 logging: Produces detailed output for debugging. Set LOG_LEVEL=WARNING in .env for quieter output.
+- Find more information on your Browserbase dashboard → https://www.browserbase.com/sign-in
+
+## USE CASES
+
+• **Restaurant data aggregation**: Build a database of restaurant menus across multiple locations for food delivery or review platforms.
+• **Menu price comparison**: Track menu prices over time to detect price changes or compare pricing across restaurant chains.
+• **Dietary restriction filtering**: Extract menu items and descriptions to identify vegan, gluten-free, or allergen-friendly options automatically.
+• **Recipe inspiration**: Collect menu descriptions to analyze trending ingredients, flavor combinations, or plating techniques.
+
+## LIMITATIONS
+• **PDF menu support**: Some restaurants use PDF menus. Enhance extraction to handle PDF downloads and OCR if needed.
+
+## NEXT STEPS
+
+• **Batch processing**: Modify to accept a list of restaurant URLs from a file and process them in parallel using asyncio workers (see scraper.py for agent pattern).
+• **Output to database**: Extend the script to save extracted menus to PostgreSQL, MongoDB, or Airtable for persistent storage and querying.
+• **Restaurant info extraction**: Expand to extract contact details (phone, email, hours, address) in addition to menu data.
+• **Incremental updates**: Track previously extracted menus and only re-scrape when website content has changed (use checksums or last-modified headers).
+
+## HELPFUL RESOURCES
+
+📚 Stagehand Docs: https://docs.stagehand.dev/v3/first-steps/introduction
+📚 Python SDK: https://docs.stagehand.dev/v3/sdk/python
+🎮 Browserbase: https://www.browserbase.com
+💡 Try it out: https://www.browserbase.com/playground
+🔧 Templates: https://www.browserbase.com/templates
+📧 Need help? support@browserbase.com
+💬 Discord: http://stagehand.dev/discord
diff --git a/python/menu-dynamic-extraction-demo/config.py b/python/menu-dynamic-extraction-demo/config.py
new file mode 100644
index 00000000..819f0e83
--- /dev/null
+++ b/python/menu-dynamic-extraction-demo/config.py
@@ -0,0 +1,36 @@
+# Stagehand + Browserbase: Restaurant Menu Extractor - Configuration
+# See README.md for full documentation
+
+"""Configuration and environment variables for the restaurant scraper."""
+
+import os
+import logging
+from dotenv import load_dotenv
+from browserbase import Browserbase
+
+# Load environment variables from .env file
+load_dotenv()
+
+# API Keys
+MODEL_API_KEY = os.getenv("GOOGLE_API_KEY")  # Google API key for Gemini models
+BROWSERBASE_API_KEY = os.getenv("BROWSERBASE_API_KEY")
+BROWSERBASE_PROJECT_ID = os.getenv("BROWSERBASE_PROJECT_ID")
+
+# File paths
+WEBSITES_FILE = os.getenv("WEBSITES_FILE", "websites.txt")
+OUTPUT_DIR = "results"
+
+# Scraper settings
+NO_MENU_LINK_FOUND = "NO_MENU_LINK_FOUND"
+MAX_RETRIES = 3
+
+# Logging
+LOG_LEVEL = os.getenv("LOG_LEVEL", "INFO").upper()
+logging.basicConfig(
+    level=LOG_LEVEL,
+    format="%(asctime)s %(levelname)s %(name)s: %(message)s",
+)
+logger = logging.getLogger(__name__)
+
+# Initialize Browserbase client
+bb = Browserbase(api_key=BROWSERBASE_API_KEY)
diff --git a/python/menu-dynamic-extraction-demo/main.py b/python/menu-dynamic-extraction-demo/main.py
new file mode 100644
index 00000000..f0dadb7a
--- /dev/null
+++ b/python/menu-dynamic-extraction-demo/main.py
@@ -0,0 +1,147 @@
+# Stagehand + Browserbase: Restaurant Menu Extractor
+# See README.md for full documentation
+
+"""
+Main entrypoint for restaurant menu extraction.
+
+This script uses Stagehand + Browserbase to automatically:
+1. Navigate to restaurant websites
+2. Find and click menu links
+3. Extract structured menu data (sections, categories, items)
+4. Save results to JSON files
+
+Usage:
+    python main.py              # Interactive mode - prompts for URL
+    python main.py --batch      # Batch mode - processes URLs from websites.txt
+"""
+
+from playwright.sync_api import sync_playwright
+from stagehand import Stagehand
+from config import (
+    BROWSERBASE_API_KEY,
+    BROWSERBASE_PROJECT_ID,
+    MODEL_API_KEY,
+    NO_MENU_LINK_FOUND,
+    bb,
+    logger
+)
+from models import MENU_SCHEMA
+from utils import normalize_url, get_website_from_user, load_websites_from_file, save_menu_to_json
+from scraper import close_popups, find_menu_link, extract_menu_from_sections, process_restaurant
+
+
+def main():
+    """Main function for interactive single-restaurant extraction."""
+    # Create Browserbase session
+    session = bb.sessions.create(project_id=BROWSERBASE_PROJECT_ID)
+    session_id = session.id
+
+    # Initialize Stagehand client
+    client = Stagehand(
+        browserbase_api_key=BROWSERBASE_API_KEY,
+        browserbase_project_id=BROWSERBASE_PROJECT_ID,
+        model_api_key=MODEL_API_KEY,
+    )
+
+    logger.info(f"Session started: {session_id}")
+    logger.info(f"Watch live: https://browserbase.com/sessions/{session_id}")
+
+    try:
+        # Connect Playwright to Browserbase
+        with sync_playwright() as p:
+            browser = p.chromium.connect_over_cdp(
+                f"wss://connect.browserbase.com?apiKey={BROWSERBASE_API_KEY}&sessionId={session_id}"
+            )
+            ctx = browser.contexts[0]
+            page = ctx.pages[0] if ctx.pages else ctx.new_page()
+
+            # Get website URL from user
+            website_url = normalize_url(get_website_from_user())
+            logger.info(f"Navigating to {website_url} ...")
+            page.goto(website_url, wait_until="domcontentloaded")
+
+            # Close any popups
+            close_popups(client, session_id)
+
+            # Locate menu link with retries
+            menu_link = find_menu_link(client, session_id)
+            if menu_link == NO_MENU_LINK_FOUND:
+                logger.error("Could not find menu link after multiple attempts.")
+            else:
+                logger.info(f"Menu link found: {menu_link}")
+
+                # Navigate to menu
+                client.sessions.act(
+                    id=session_id,
+                    input=f"Click on: {menu_link[0] if isinstance(menu_link, list) else menu_link}",
+                    options={"model": {"modelName": "google/gemini-2.5-flash"}},
+                )
+
+                page.wait_for_load_state("load", timeout=20000)
+
+                # Find menu subsections
+                sections_response = client.sessions.observe(
+                    id=session_id,
+                    instruction="Find all subsections on the current menu page, i.e. 'Lunch', 'Dinner', 'Happy Hour', etc. "
+                               "Return them as a list of links. If none found, return the current page link only in a list. "
+                               "Do not return duplicates if a link appears multiple times.",
+                )
+                sections = sections_response.data.result
+
+                # Extract menu from all sections
+                all_menu_sections = extract_menu_from_sections(client, session_id, page, sections)
+
+                # Save combined menu data to JSON file
+                if all_menu_sections:
+                    save_menu_to_json(website_url, all_menu_sections)
+
+            browser.close()
+
+    finally:
+        # End session
+        client.sessions.end(id=session_id)
+        logger.info("Session closed successfully")
+
+
+def batch_process():
+    """
+    Process multiple restaurant websites in parallel.
+    URLs are loaded from WEBSITES_FILE (default: websites.txt).
+
+    Example usage:
+        Create websites.txt with one URL per line:
+        https://www.restaurant1.com
+        https://www.restaurant2.com
+        # This is a comment
+        https://www.restaurant3.com
+    """
+    websites = load_websites_from_file()
+    if not websites:
+        logger.error("No websites to process")
+        return
+
+    logger.info(f"Starting batch processing of {len(websites)} websites")
+
+    # Process all restaurants sequentially (sync version)
+    results = []
+    for idx, url in enumerate(websites, start=1):
+        result = process_restaurant(url, agent_id=idx)
+        results.append(result)
+
+    # Summary
+    successful = sum(1 for r in results if r["status"] == "success")
+    failed = len(results) - successful
+    logger.info(f"\n{'='*60}")
+    logger.info(f"Batch processing complete!")
+    logger.info(f"Total: {len(results)} | Success: {successful} | Failed: {failed}")
+    logger.info(f"{'='*60}\n")
+
+
+if __name__ == "__main__":
+    import sys
+
+    # Simple CLI argument handling
+    if len(sys.argv) > 1 and sys.argv[1] == "--batch":
+        batch_process()
+    else:
+        main()
diff --git a/python/menu-dynamic-extraction-demo/models.py b/python/menu-dynamic-extraction-demo/models.py
new file mode 100644
index 00000000..3eb99f98
--- /dev/null
+++ b/python/menu-dynamic-extraction-demo/models.py
@@ -0,0 +1,98 @@
+# Stagehand + Browserbase: Restaurant Menu Extractor - Data Models
+# See README.md for full documentation
+
+"""Pydantic models and JSON schemas for menu extraction."""
+
+from typing import Optional, List
+from pydantic import BaseModel, Field
+
+
+class MenuItem(BaseModel):
+    name: str
+    description: Optional[str] = None
+    price: Optional[str] = None
+
+
+class MenuCategory(BaseModel):
+    """
+    A category within a section.
+    e.g., "Antipasti", "Pizza", "Pasta"
+    """
+    category_name: str
+    items: List[MenuItem]
+
+
+class MenuSection(BaseModel):
+    """
+    A full menu section, e.g., "Lunch", "Dinner", "Dessert".
+    Each section contains its own categories.
+    """
+    section_name: str
+    categories: List[MenuCategory]
+
+
+class Menu(BaseModel):
+    """
+    The full restaurant menu.
+    Compatible with restaurants with multiple menu pages or subsections.
+    """
+    sections: List[MenuSection]
+
+
+# Manual JSON schema for Gemini API compatibility (avoids Pydantic's $defs)
+MENU_SCHEMA = {
+    "type": "object",
+    "properties": {
+        "sections": {
+            "type": "array",
+            "description": "Menu sections (e.g., Lunch, Dinner, Dessert)",
+            "items": {
+                "type": "object",
+                "properties": {
+                    "section_name": {
+                        "type": "string",
+                        "description": "Name of the menu section"
+                    },
+                    "categories": {
+                        "type": "array",
+                        "description": "Categories within this section",
+                        "items": {
+                            "type": "object",
+                            "properties": {
+                                "category_name": {
+                                    "type": "string",
+                                    "description": "Name of the category (e.g., Appetizers, Entrees)"
+                                },
+                                "items": {
+                                    "type": "array",
+                                    "description": "Menu items in this category",
+                                    "items": {
+                                        "type": "object",
+                                        "properties": {
+                                            "name": {
+                                                "type": "string",
+                                                "description": "Item name"
+                                            },
+                                            "description": {
+                                                "type": "string",
+                                                "description": "Item description"
+                                            },
+                                            "price": {
+                                                "type": "string",
+                                                "description": "Item price"
+                                            }
+                                        },
+                                        "required": ["name"]
+                                    }
+                                }
+                            },
+                            "required": ["category_name", "items"]
+                        }
+                    }
+                },
+                "required": ["section_name", "categories"]
+            }
+        }
+    },
+    "required": ["sections"]
+}
diff --git a/python/menu-dynamic-extraction-demo/pyproject.toml b/python/menu-dynamic-extraction-demo/pyproject.toml
new file mode 100644
index 00000000..0e0bb560
--- /dev/null
+++ b/python/menu-dynamic-extraction-demo/pyproject.toml
@@ -0,0 +1,35 @@
+[project]
+name = "restaurant-demo"
+version = "0.1.0"
+description = "Restaurant menu extraction using Stagehand and Browserbase"
+readme = "README.md"
+requires-python = ">=3.9"
+dependencies = [
+    "browserbase>=1.4.0",
+    "playwright>=1.40.0",
+    "pydantic>=2.0.0",
+    "python-dotenv>=1.2.1",
+    "stagehand>=3.0.0",  # v3 API - pure API client
+]
+
+[project.optional-dependencies]
+dev = [
+    "pytest>=7.0.0",
+    "black>=23.0.0",
+    "ruff>=0.1.0",
+]
+
+[build-system]
+requires = ["setuptools>=61.0", "wheel"]
+build-backend = "setuptools.build_meta"
+
+[tool.black]
+line-length = 100
+target-version = ['py39', 'py310', 'py311']
+
+[tool.ruff]
+line-length = 100
+target-version = "py39"
+
+[tool.ruff.lint]
+select = ["E", "F", "I", "N", "W"]
diff --git a/python/menu-dynamic-extraction-demo/scraper.py b/python/menu-dynamic-extraction-demo/scraper.py
new file mode 100644
index 00000000..4eaad1de
--- /dev/null
+++ b/python/menu-dynamic-extraction-demo/scraper.py
@@ -0,0 +1,255 @@
+# Stagehand + Browserbase: Restaurant Menu Extractor - Scraping Logic
+# See README.md for full documentation
+
+"""Core scraping logic for restaurant menu extraction."""
+
+import time
+import logging
+from typing import List, Dict, Any, Optional
+from datetime import datetime
+from playwright.sync_api import sync_playwright, Page
+from stagehand import Stagehand
+from config import (
+    BROWSERBASE_API_KEY,
+    BROWSERBASE_PROJECT_ID,
+    MODEL_API_KEY,
+    NO_MENU_LINK_FOUND,
+    MAX_RETRIES,
+    bb,
+    logger
+)
+from models import MENU_SCHEMA
+from utils import save_menu_to_json
+
+
+def close_popups(client: Stagehand, session_id: str, log: logging.Logger = logger) -> bool:
+    """
+    Attempt to close popups/modals that might be blocking the page.
+
+    Args:
+        client: Stagehand client instance
+        session_id: Active session ID
+        log: Logger instance
+
+    Returns:
+        True if popups were closed, False otherwise
+    """
+    try:
+        client.sessions.act(
+            id=session_id,
+            input="Close any popups, modals, or cookie notices that are blocking the page",
+            options={"model": {"modelName": "google/gemini-2.5-flash"}},
+        )
+        log.info("Successfully closed popups/modals")
+        return True
+    except Exception as e:
+        log.debug(f"No popups to close or failed to close: {e}")
+        return False
+
+
+def find_menu_link(client: Stagehand, session_id: str, max_retries: int = MAX_RETRIES):
+    """
+    Attempt to locate the restaurant's menu link using Stagehand observe.
+    Retries up to max_retries times if it fails.
+
+    Args:
+        client: Stagehand client instance
+        session_id: Active session ID
+        max_retries: Maximum number of retry attempts
+
+    Returns:
+        Menu link result or NO_MENU_LINK_FOUND
+    """
+    instruction = (
+        "Find the most likely link to the restaurant's menu on this webpage. If the webpage "
+        "already is the menu page, return the current page URL. Return only the link URL."
+    )
+
+    for attempt in range(1, max_retries + 1):
+        try:
+            response = client.sessions.observe(
+                id=session_id,
+                instruction=instruction,
+                options={"model": {"modelName": "google/gemini-2.5-flash"}},
+            )
+            return response.data.result
+        except Exception as e:
+            logger.warning(f"[Attempt {attempt}] Failed: {e}")
+            time.sleep(1)
+    return NO_MENU_LINK_FOUND
+
+
+def extract_menu_from_sections(
+    client: Stagehand,
+    session_id: str,
+    page: Page,
+    sections: List[Any]
+) -> List[Dict[str, Any]]:
+    """
+    Extract menu data from all sections.
+
+    Args:
+        client: Stagehand client instance
+        session_id: Active session ID
+        page: Playwright page instance
+        sections: List of menu sections to extract
+
+    Returns:
+        List of all extracted menu sections
+    """
+    all_menu_sections = []
+
+    for section in sections:
+        section_desc = section.get("description", "") if isinstance(section, dict) else str(section)
+        logger.info(f"Navigating to menu section: {section_desc} ...")
+
+        # Skip iframe links
+        if "iframe" in section_desc.lower():
+            logger.info("Skipping iframe link ...")
+            continue
+
+        # Navigate to section
+        client.sessions.act(
+            id=session_id,
+            input=f"Navigate to: {section_desc}",
+            options={"model": {"modelName": "google/gemini-2.5-flash"}},
+        )
+
+        page.wait_for_load_state("load", timeout=20000)
+
+        # Extract menu data
+        extract_response = client.sessions.extract(
+            id=session_id,
+            instruction="Extract the menu organized by sections and categories. "
+                       "Each section contains categories, and each category contains menu items. "
+                       "For each item, extract the name, description, and price. "
+                       "Preserve price formatting exactly as written.",
+            schema=MENU_SCHEMA,
+            options={"model": {"modelName": "google/gemini-2.5-flash"}},
+        )
+        logger.info(f"Menu data extracted for {section_desc}")
+
+        # Collect the extracted menu data
+        menu_data = extract_response.data.result
+        if menu_data and "sections" in menu_data:
+            all_menu_sections.extend(menu_data["sections"])
+
+    return all_menu_sections
+
+
+def process_restaurant(website_url: str, agent_id: int) -> Dict[str, Any]:
+    """
+    Web agent that processes a single restaurant website.
+    This represents a single subprocessor in a production pipeline.
+
+    Args:
+        website_url: The restaurant website to scrape
+        agent_id: Unique identifier for this agent instance
+
+    Returns:
+        Dictionary containing extraction results and metadata
+    """
+    agent_logger = logging.getLogger(f"Agent-{agent_id}")
+    start_time = datetime.now()
+
+    result = {
+        "agent_id": agent_id,
+        "url": website_url,
+        "status": "pending",
+        "start_time": start_time.isoformat(),
+        "menu_data": [],
+        "error": None,
+    }
+
+    # Create Browserbase session
+    session = bb.sessions.create(project_id=BROWSERBASE_PROJECT_ID)
+    session_id = session.id
+
+    # Initialize Stagehand client
+    client = Stagehand(
+        browserbase_api_key=BROWSERBASE_API_KEY,
+        browserbase_project_id=BROWSERBASE_PROJECT_ID,
+        model_api_key=MODEL_API_KEY,
+    )
+
+    agent_logger.info(f"Session started: {session_id}")
+    agent_logger.info(f"Watch live: https://browserbase.com/sessions/{session_id}")
+
+    try:
+        # Connect Playwright to Browserbase
+        with sync_playwright() as p:
+            browser = p.chromium.connect_over_cdp(
+                f"wss://connect.browserbase.com?apiKey={BROWSERBASE_API_KEY}&sessionId={session_id}"
+            )
+            ctx = browser.contexts[0]
+            page = ctx.pages[0] if ctx.pages else ctx.new_page()
+
+            # Navigate to website
+            agent_logger.info(f"Navigating to {website_url}")
+            page.goto(website_url, wait_until="domcontentloaded")
+
+            # Close any popups on initial page load
+            close_popups(client, session_id, agent_logger)
+
+            # Extract menu data
+            menu_link = find_menu_link(client, session_id)
+            if menu_link == NO_MENU_LINK_FOUND:
+                agent_logger.warning("Could not find menu link")
+            else:
+                agent_logger.info(f"Menu link: {menu_link}")
+
+                # Navigate to menu link
+                client.sessions.act(
+                    id=session_id,
+                    input=f"Click on: {menu_link[0] if isinstance(menu_link, list) else menu_link}",
+                    options={"model": {"modelName": "google/gemini-2.5-flash"}},
+                )
+
+                page.wait_for_load_state("load", timeout=20000)
+
+                # Close any popups after navigating to menu page
+                close_popups(client, session_id, agent_logger)
+
+                # Extract menu sections
+                sections_response = client.sessions.observe(
+                    id=session_id,
+                    instruction="Find all subsections on the current menu page, i.e. 'Lunch', 'Dinner', 'Happy Hour', etc. "
+                               "Return them as a list of links. If none found, return the current page link only in a list. "
+                               "Do not return duplicates if a link appears multiple times.",
+                )
+                sections = sections_response.data.result
+
+                # Extract menu from all sections
+                all_menu_sections = extract_menu_from_sections(client, session_id, page, sections)
+
+            browser.close()
+
+        result["status"] = "success"
+        end_time = datetime.now()
+        result["end_time"] = end_time.isoformat()
+        result["duration_seconds"] = (end_time - start_time).total_seconds()
+        agent_logger.info(f"Completed extraction in {result['duration_seconds']:.2f}s")
+
+        # Save combined menu data to JSON file
+        if all_menu_sections:
+            save_menu_to_json(
+                website_url,
+                all_menu_sections,
+                agent_id=agent_id,
+                duration_seconds=result["duration_seconds"]
+            )
+
+    except Exception as e:
+        result["status"] = "error"
+        result["error"] = str(e)
+        agent_logger.error(f"Error processing {website_url}: {e}", exc_info=True)
+
+    finally:
+        # End session
+        try:
+            client.sessions.end(id=session_id)
+            agent_logger.info("Session closed successfully")
+        except Exception as e:
+            agent_logger.error(f"Error closing session: {e}")
+
+    return result
diff --git a/python/menu-dynamic-extraction-demo/utils.py b/python/menu-dynamic-extraction-demo/utils.py
new file mode 100644
index 00000000..23171436
--- /dev/null
+++ b/python/menu-dynamic-extraction-demo/utils.py
@@ -0,0 +1,117 @@
+# Stagehand + Browserbase: Restaurant Menu Extractor - Utilities
+# See README.md for full documentation
+
+"""Utility functions for the restaurant scraper."""
+
+import json
+import time
+import re
+from typing import List, Dict, Any
+from pathlib import Path
+from datetime import datetime
+from urllib.parse import urlparse
+from config import WEBSITES_FILE, OUTPUT_DIR, logger
+
+
+def normalize_url(url: str) -> str:
+    """
+    Normalize URL to ensure it has a protocol.
+
+    Args:
+        url: The URL to normalize
+
+    Returns:
+        Normalized URL with https:// prefix
+    """
+    url = url.strip()
+    if not url.startswith(("http://", "https://")):
+        url = "https://" + url
+    return url
+
+
+def load_websites_from_file(file_path: str = WEBSITES_FILE) -> List[str]:
+    """
+    Load website URLs from a text file.
+    Lines starting with # are treated as comments and ignored.
+
+    Args:
+        file_path: Path to the file containing URLs
+
+    Returns:
+        List of normalized URLs
+    """
+    websites = []
+    try:
+        with open(file_path, 'r') as f:
+            for line in f:
+                line = line.strip()
+                if line and not line.startswith('#'):
+                    websites.append(normalize_url(line))
+        logger.info(f"Loaded {len(websites)} websites from {file_path}")
+        return websites
+    except FileNotFoundError:
+        logger.error(f"File not found: {file_path}")
+        return []
+
+
+def get_website_from_user() -> str:
+    """
+    Prompt the user to enter a restaurant website URL.
+
+    Returns:
+        The URL entered by the user
+    """
+    return input("Enter restaurant website URL: ").strip()
+
+
+def save_menu_to_json(
+    website_url: str,
+    all_menu_sections: List[Dict[str, Any]],
+    agent_id: int = None,
+    duration_seconds: float = None
+) -> str:
+    """
+    Save combined menu data to a beautifully formatted JSON file.
+
+    Args:
+        website_url: The restaurant website URL
+        all_menu_sections: Combined list of all menu sections
+        agent_id: Optional agent ID for batch processing
+        duration_seconds: Optional duration of extraction
+
+    Returns:
+        Path to the saved JSON file
+    """
+    # Create results directory if it doesn't exist
+    Path(OUTPUT_DIR).mkdir(exist_ok=True)
+
+    # Generate safe filename from URL and timestamp
+    parsed_url = urlparse(website_url)
+    safe_name = re.sub(r'[^\w\-]', '_', parsed_url.netloc or parsed_url.path)
+    timestamp = int(time.time())
+    filename = f"{OUTPUT_DIR}/{safe_name}_{timestamp}.json"
+
+    # Create combined output
+    output_data = {
+        "restaurant_url": website_url,
+        "extracted_at": timestamp,
+        "extracted_at_readable": datetime.fromtimestamp(timestamp).isoformat(),
+        "menu": {
+            "sections": all_menu_sections
+        }
+    }
+
+    # Add optional fields
+    if agent_id is not None:
+        output_data["agent_id"] = agent_id
+    if duration_seconds is not None:
+        output_data["duration_seconds"] = duration_seconds
+
+    # Write beautifully formatted JSON
+    with open(filename, 'w', encoding='utf-8') as f:
+        json.dump(output_data, f, indent=2, ensure_ascii=False)
+
+    logger.info(f"✓ Menu saved to: {filename}")
+    logger.info(f"✓ Total sections extracted: {len(all_menu_sections)}")
+
+    return filename

From e8f6cdb5e71a5d8b04c0cd1ac0f9efcbcc435fac Mon Sep 17 00:00:00 2001
From: Alex Qiu <alexander@browserbase.com>
Date: Tue, 17 Feb 2026 09:47:10 -0800
Subject: [PATCH 2/5] small fixes

---
 .../menu-dynamic-extraction-demo/.env.example |  9 +-------
 python/menu-dynamic-extraction-demo/README.md | 21 +++++++++++++------
 python/menu-dynamic-extraction-demo/config.py | 11 +++++++++-
 python/menu-dynamic-extraction-demo/main.py   |  7 ++++---
 .../pyproject.toml                            |  2 +-
 .../menu-dynamic-extraction-demo/scraper.py   |  1 +
 .../websites.txt.example                      | 15 +++++++++++++
 7 files changed, 47 insertions(+), 19 deletions(-)
 create mode 100644 python/menu-dynamic-extraction-demo/websites.txt.example

diff --git a/python/menu-dynamic-extraction-demo/.env.example b/python/menu-dynamic-extraction-demo/.env.example
index 79ab44e8..9afbafcc 100644
--- a/python/menu-dynamic-extraction-demo/.env.example
+++ b/python/menu-dynamic-extraction-demo/.env.example
@@ -3,13 +3,6 @@
 BROWSERBASE_PROJECT_ID=your_browserbase_project_id
 BROWSERBASE_API_KEY=your_browserbase_api_key
 
-# Google API key for Gemini model (required for Stagehand)
+# Google API key (required for Stagehand with Gemini models)
 # Get your key from https://aistudio.google.com/apikey
 GOOGLE_API_KEY=your_google_api_key
-
-# Optional: Logging configuration
-# LOG_LEVEL=INFO
-
-# Optional: File paths
-# WEBSITES_FILE=websites.txt
-# OUTPUT_DIR=results
diff --git a/python/menu-dynamic-extraction-demo/README.md b/python/menu-dynamic-extraction-demo/README.md
index 28ade423..6f2d7e28 100644
--- a/python/menu-dynamic-extraction-demo/README.md
+++ b/python/menu-dynamic-extraction-demo/README.md
@@ -4,6 +4,7 @@
 
 - **Goal**: Automate restaurant menu extraction from websites using AI-powered browser automation to scrape menu items, prices, descriptions, and categories.
 - **Pattern Template**: Demonstrates web scraping with Stagehand's observe/act/extract pattern for navigating complex restaurant websites and parsing menu structures.
+- **One script, many websites**: Stagehand can adapt to different webpage layouts with same core script thanks to its LLM-powered primitives.
 - **Workflow**: Stagehand navigates to restaurant website, finds menu links using observe, extracts structured data with Pydantic schemas, handles multi-section menus (lunch/dinner/drinks), and outputs JSON results.
 - **Multi-Section Support**: Automatically detects menu subsections (Lunch, Dinner, Happy Hour, etc.) and extracts each separately for comprehensive coverage.
 - **Production-Ready**: Includes retry logic, popup handling, logging, error recovery, and parallel processing capabilities for batch extraction.
@@ -24,14 +25,14 @@
 
 ## QUICKSTART
 
-1. cd python/restaurant-demo
+1. cd menu-dynamic-extraction-demo
 2. Install dependencies with uv:
 
    ```bash
    uv pip install -e .
    ```
 
-   Alternatively, use pip:
+   Alternatively, use pip/ pip3:
 
    ```bash
    python -m venv venv
@@ -49,6 +50,13 @@
    python main.py
    ```
    The script will prompt you for a restaurant website URL.
+   Some of our favorites here in SF include https://www.thetailorssonsf.com/, https://www.thegrovesf.com/, and https://www.nopalitosf.com/.
+
+   For batch processing multiple restaurants:
+   ```bash
+   python main.py --batch
+   ```
+   Create a `websites.txt` file with one URL per line (see websites.txt.example). 
 
 ## EXPECTED OUTPUT
 
@@ -61,7 +69,7 @@
 - For each subsection:
   - Navigates to that section
   - Extracts structured menu data: sections → categories → items (name, description, price)
-- All extraction results are stored in the Stagehand session (can be extended to write JSON files)
+- Saves all extraction results to timestamped JSON files in the `results/` directory
 - Session closes cleanly after extraction completes
 
 Example log output:
@@ -78,10 +86,10 @@ INFO: Session closed successfully
 
 - "ModuleNotFoundError: No module named 'stagehand'": Ensure you installed dependencies with `uv pip install -e .` or `pip install -e .`
 - Missing API keys: Verify .env contains BROWSERBASE_PROJECT_ID, BROWSERBASE_API_KEY, and GOOGLE_API_KEY
-- "Could not find menu link after multiple attempts": The restaurant website may have an unusual structure. Try manually checking if there's a clear "Menu" link. Increase MAX_RETRIES in config if needed.
+- "Could not find menu link after multiple attempts": The restaurant website may have an unusual structure. Try manually checking if there's a clear "Menu" link. Increase MAX_RETRIES in config.py if needed.
 - Popup/modal blocking: The script attempts to close popups automatically, but some sites have persistent overlays. Check the Browserbase live view link to debug.
 - Empty extraction results: Some restaurant sites load menus dynamically or via iframes. The script skips iframe links automatically but may need manual adjustment for special cases.
-- Stagehand verbose=2 logging: Produces detailed output for debugging. Set LOG_LEVEL=WARNING in .env for quieter output.
+- Detailed logging: The script logs INFO level by default. Set LOG_LEVEL=WARNING in .env for quieter output, or LOG_LEVEL=DEBUG for more verbose logging.
 - Find more information on your Browserbase dashboard → https://www.browserbase.com/sign-in
 
 ## USE CASES
@@ -96,10 +104,11 @@ INFO: Session closed successfully
 
 ## NEXT STEPS
 
-• **Batch processing**: Modify to accept a list of restaurant URLs from a file and process them in parallel using asyncio workers (see scraper.py for agent pattern).
+• **Parallel batch processing**: Enhance batch processing to use asyncio workers for concurrent extraction across multiple restaurants (currently processes sequentially).
 • **Output to database**: Extend the script to save extracted menus to PostgreSQL, MongoDB, or Airtable for persistent storage and querying.
 • **Restaurant info extraction**: Expand to extract contact details (phone, email, hours, address) in addition to menu data.
 • **Incremental updates**: Track previously extracted menus and only re-scrape when website content has changed (use checksums or last-modified headers).
+• **PDF menu support**: Add support for restaurants that use PDF menus instead of web pages.
 
 ## HELPFUL RESOURCES
 
diff --git a/python/menu-dynamic-extraction-demo/config.py b/python/menu-dynamic-extraction-demo/config.py
index 819f0e83..cef7588d 100644
--- a/python/menu-dynamic-extraction-demo/config.py
+++ b/python/menu-dynamic-extraction-demo/config.py
@@ -12,10 +12,19 @@
 load_dotenv()
 
 # API Keys
-MODEL_API_KEY = os.getenv("GOOGLE_API_KEY")  # Google API key for Gemini models
+GOOGLE_API_KEY = os.getenv("GOOGLE_API_KEY")  # Google API key for Gemini models
+MODEL_API_KEY = GOOGLE_API_KEY  # Alias for compatibility
 BROWSERBASE_API_KEY = os.getenv("BROWSERBASE_API_KEY")
 BROWSERBASE_PROJECT_ID = os.getenv("BROWSERBASE_PROJECT_ID")
 
+# Validate required environment variables
+if not GOOGLE_API_KEY:
+    raise ValueError("GOOGLE_API_KEY environment variable is required. Get one at https://aistudio.google.com/apikey")
+if not BROWSERBASE_API_KEY:
+    raise ValueError("BROWSERBASE_API_KEY environment variable is required. Get one at https://www.browserbase.com/settings")
+if not BROWSERBASE_PROJECT_ID:
+    raise ValueError("BROWSERBASE_PROJECT_ID environment variable is required. Get one at https://www.browserbase.com/settings")
+
 # File paths
 WEBSITES_FILE = os.getenv("WEBSITES_FILE", "websites.txt")
 OUTPUT_DIR = "results"
diff --git a/python/menu-dynamic-extraction-demo/main.py b/python/menu-dynamic-extraction-demo/main.py
index f0dadb7a..ebab1148 100644
--- a/python/menu-dynamic-extraction-demo/main.py
+++ b/python/menu-dynamic-extraction-demo/main.py
@@ -64,6 +64,7 @@ def main():
             close_popups(client, session_id)
 
             # Locate menu link with retries
+            all_menu_sections = []
             menu_link = find_menu_link(client, session_id)
             if menu_link == NO_MENU_LINK_FOUND:
                 logger.error("Could not find menu link after multiple attempts.")
@@ -91,9 +92,9 @@ def main():
                 # Extract menu from all sections
                 all_menu_sections = extract_menu_from_sections(client, session_id, page, sections)
 
-                # Save combined menu data to JSON file
-                if all_menu_sections:
-                    save_menu_to_json(website_url, all_menu_sections)
+            # Save combined menu data to JSON file
+            if all_menu_sections:
+                save_menu_to_json(website_url, all_menu_sections)
 
             browser.close()
 
diff --git a/python/menu-dynamic-extraction-demo/pyproject.toml b/python/menu-dynamic-extraction-demo/pyproject.toml
index 0e0bb560..262deef4 100644
--- a/python/menu-dynamic-extraction-demo/pyproject.toml
+++ b/python/menu-dynamic-extraction-demo/pyproject.toml
@@ -1,5 +1,5 @@
 [project]
-name = "restaurant-demo"
+name = "menu-dynamic-extraction-demo"
 version = "0.1.0"
 description = "Restaurant menu extraction using Stagehand and Browserbase"
 readme = "README.md"
diff --git a/python/menu-dynamic-extraction-demo/scraper.py b/python/menu-dynamic-extraction-demo/scraper.py
index 4eaad1de..9e3cc7cb 100644
--- a/python/menu-dynamic-extraction-demo/scraper.py
+++ b/python/menu-dynamic-extraction-demo/scraper.py
@@ -192,6 +192,7 @@ def process_restaurant(website_url: str, agent_id: int) -> Dict[str, Any]:
             close_popups(client, session_id, agent_logger)
 
             # Extract menu data
+            all_menu_sections = []
             menu_link = find_menu_link(client, session_id)
             if menu_link == NO_MENU_LINK_FOUND:
                 agent_logger.warning("Could not find menu link")
diff --git a/python/menu-dynamic-extraction-demo/websites.txt.example b/python/menu-dynamic-extraction-demo/websites.txt.example
new file mode 100644
index 00000000..16953ad8
--- /dev/null
+++ b/python/menu-dynamic-extraction-demo/websites.txt.example
@@ -0,0 +1,15 @@
+# Restaurant Menu Extraction - Batch Processing URLs
+#
+# Instructions:
+# 1. Copy this file to websites.txt
+# 2. Add one restaurant URL per line
+# 3. Lines starting with # are treated as comments and ignored
+# 4. Run: python main.py --batch
+#
+# Example URLs:
+
+https://www.thetailorssonsf.com/
+https://www.thegrovesf.com/
+https://www.nopalitosf.com/
+
+# Add more restaurant URLs below:

From 472f395a8b4cc302b6032a90d02ac18ee57ade90 Mon Sep 17 00:00:00 2001
From: Alex Qiu <alexander@browserbase.com>
Date: Tue, 17 Feb 2026 09:50:26 -0800
Subject: [PATCH 3/5] change env var to MODEL_API_KEY everywhere

---
 python/menu-dynamic-extraction-demo/.env.example | 6 +++---
 python/menu-dynamic-extraction-demo/README.md    | 4 ++--
 python/menu-dynamic-extraction-demo/config.py    | 7 +++----
 3 files changed, 8 insertions(+), 9 deletions(-)

diff --git a/python/menu-dynamic-extraction-demo/.env.example b/python/menu-dynamic-extraction-demo/.env.example
index 9afbafcc..8384030d 100644
--- a/python/menu-dynamic-extraction-demo/.env.example
+++ b/python/menu-dynamic-extraction-demo/.env.example
@@ -3,6 +3,6 @@
 BROWSERBASE_PROJECT_ID=your_browserbase_project_id
 BROWSERBASE_API_KEY=your_browserbase_api_key
 
-# Google API key (required for Stagehand with Gemini models)
-# Get your key from https://aistudio.google.com/apikey
-GOOGLE_API_KEY=your_google_api_key
+# Model API key (required for Stagehand)
+# For Google Gemini models, get your key from https://aistudio.google.com/apikey
+MODEL_API_KEY=your_model_api_key
diff --git a/python/menu-dynamic-extraction-demo/README.md b/python/menu-dynamic-extraction-demo/README.md
index 6f2d7e28..09863317 100644
--- a/python/menu-dynamic-extraction-demo/README.md
+++ b/python/menu-dynamic-extraction-demo/README.md
@@ -44,7 +44,7 @@
 4. Add required API keys to .env:
    - `BROWSERBASE_PROJECT_ID` - Get from https://www.browserbase.com/settings
    - `BROWSERBASE_API_KEY` - Get from https://www.browserbase.com/settings
-   - `GOOGLE_API_KEY` - Get from https://aistudio.google.com/apikey
+   - `MODEL_API_KEY` - Get from https://aistudio.google.com/apikey (for Google Gemini)
 5. Run the script:
    ```bash
    python main.py
@@ -85,7 +85,7 @@ INFO: Session closed successfully
 ## COMMON PITFALLS
 
 - "ModuleNotFoundError: No module named 'stagehand'": Ensure you installed dependencies with `uv pip install -e .` or `pip install -e .`
-- Missing API keys: Verify .env contains BROWSERBASE_PROJECT_ID, BROWSERBASE_API_KEY, and GOOGLE_API_KEY
+- Missing API keys: Verify .env contains BROWSERBASE_PROJECT_ID, BROWSERBASE_API_KEY, and MODEL_API_KEY
 - "Could not find menu link after multiple attempts": The restaurant website may have an unusual structure. Try manually checking if there's a clear "Menu" link. Increase MAX_RETRIES in config.py if needed.
 - Popup/modal blocking: The script attempts to close popups automatically, but some sites have persistent overlays. Check the Browserbase live view link to debug.
 - Empty extraction results: Some restaurant sites load menus dynamically or via iframes. The script skips iframe links automatically but may need manual adjustment for special cases.
diff --git a/python/menu-dynamic-extraction-demo/config.py b/python/menu-dynamic-extraction-demo/config.py
index cef7588d..6b624ac2 100644
--- a/python/menu-dynamic-extraction-demo/config.py
+++ b/python/menu-dynamic-extraction-demo/config.py
@@ -12,14 +12,13 @@
 load_dotenv()
 
 # API Keys
-GOOGLE_API_KEY = os.getenv("GOOGLE_API_KEY")  # Google API key for Gemini models
-MODEL_API_KEY = GOOGLE_API_KEY  # Alias for compatibility
+MODEL_API_KEY = os.getenv("MODEL_API_KEY")  # API key for LLM provider (e.g., Google Gemini)
 BROWSERBASE_API_KEY = os.getenv("BROWSERBASE_API_KEY")
 BROWSERBASE_PROJECT_ID = os.getenv("BROWSERBASE_PROJECT_ID")
 
 # Validate required environment variables
-if not GOOGLE_API_KEY:
-    raise ValueError("GOOGLE_API_KEY environment variable is required. Get one at https://aistudio.google.com/apikey")
+if not MODEL_API_KEY:
+    raise ValueError("MODEL_API_KEY environment variable is required. For Google Gemini, get one at https://aistudio.google.com/apikey")
 if not BROWSERBASE_API_KEY:
     raise ValueError("BROWSERBASE_API_KEY environment variable is required. Get one at https://www.browserbase.com/settings")
 if not BROWSERBASE_PROJECT_ID:

From 751bd4affa57e34e02104ab08d6707a9bd89f605 Mon Sep 17 00:00:00 2001
From: Alex Qiu <alexander@browserbase.com>
Date: Tue, 17 Feb 2026 09:53:46 -0800
Subject: [PATCH 4/5] stagehand use default model config, not gemini
 specifically

---
 python/menu-dynamic-extraction-demo/main.py    | 1 -
 python/menu-dynamic-extraction-demo/scraper.py | 5 -----
 2 files changed, 6 deletions(-)

diff --git a/python/menu-dynamic-extraction-demo/main.py b/python/menu-dynamic-extraction-demo/main.py
index ebab1148..006fed53 100644
--- a/python/menu-dynamic-extraction-demo/main.py
+++ b/python/menu-dynamic-extraction-demo/main.py
@@ -75,7 +75,6 @@ def main():
                 client.sessions.act(
                     id=session_id,
                     input=f"Click on: {menu_link[0] if isinstance(menu_link, list) else menu_link}",
-                    options={"model": {"modelName": "google/gemini-2.5-flash"}},
                 )
 
                 page.wait_for_load_state("load", timeout=20000)
diff --git a/python/menu-dynamic-extraction-demo/scraper.py b/python/menu-dynamic-extraction-demo/scraper.py
index 9e3cc7cb..e3bf9e57 100644
--- a/python/menu-dynamic-extraction-demo/scraper.py
+++ b/python/menu-dynamic-extraction-demo/scraper.py
@@ -38,7 +38,6 @@ def close_popups(client: Stagehand, session_id: str, log: logging.Logger = logge
         client.sessions.act(
             id=session_id,
             input="Close any popups, modals, or cookie notices that are blocking the page",
-            options={"model": {"modelName": "google/gemini-2.5-flash"}},
         )
         log.info("Successfully closed popups/modals")
         return True
@@ -70,7 +69,6 @@ def find_menu_link(client: Stagehand, session_id: str, max_retries: int = MAX_RE
             response = client.sessions.observe(
                 id=session_id,
                 instruction=instruction,
-                options={"model": {"modelName": "google/gemini-2.5-flash"}},
             )
             return response.data.result
         except Exception as e:
@@ -112,7 +110,6 @@ def extract_menu_from_sections(
         client.sessions.act(
             id=session_id,
             input=f"Navigate to: {section_desc}",
-            options={"model": {"modelName": "google/gemini-2.5-flash"}},
         )
 
         page.wait_for_load_state("load", timeout=20000)
@@ -125,7 +122,6 @@ def extract_menu_from_sections(
                        "For each item, extract the name, description, and price. "
                        "Preserve price formatting exactly as written.",
             schema=MENU_SCHEMA,
-            options={"model": {"modelName": "google/gemini-2.5-flash"}},
         )
         logger.info(f"Menu data extracted for {section_desc}")
 
@@ -203,7 +199,6 @@ def process_restaurant(website_url: str, agent_id: int) -> Dict[str, Any]:
                 client.sessions.act(
                     id=session_id,
                     input=f"Click on: {menu_link[0] if isinstance(menu_link, list) else menu_link}",
-                    options={"model": {"modelName": "google/gemini-2.5-flash"}},
                 )
 
                 page.wait_for_load_state("load", timeout=20000)

From ec1d6102f79651c804386e85b9d56bf91d958abb Mon Sep 17 00:00:00 2001
From: Alex Qiu <alexander@browserbase.com>
Date: Tue, 17 Feb 2026 10:41:20 -0800
Subject: [PATCH 5/5] fix browser session management

---
 python/menu-dynamic-extraction-demo/README.md |  2 +-
 python/menu-dynamic-extraction-demo/config.py |  4 -
 python/menu-dynamic-extraction-demo/main.py   | 99 +++++++++----------
 .../pyproject.toml                            |  4 +-
 .../menu-dynamic-extraction-demo/scraper.py   | 93 ++++++++---------
 5 files changed, 87 insertions(+), 115 deletions(-)

diff --git a/python/menu-dynamic-extraction-demo/README.md b/python/menu-dynamic-extraction-demo/README.md
index 09863317..170d1053 100644
--- a/python/menu-dynamic-extraction-demo/README.md
+++ b/python/menu-dynamic-extraction-demo/README.md
@@ -84,7 +84,7 @@ INFO: Session closed successfully
 
 ## COMMON PITFALLS
 
-- "ModuleNotFoundError: No module named 'stagehand'": Ensure you installed dependencies with `uv pip install -e .` or `pip install -e .`
+- "ModuleNotFoundError: No module named 'stagehand'": Ensure you installed dependencies with `uv pip install -e .` or `pip install -e .`. Note: Playwright is not required as Stagehand manages the browser automatically.
 - Missing API keys: Verify .env contains BROWSERBASE_PROJECT_ID, BROWSERBASE_API_KEY, and MODEL_API_KEY
 - "Could not find menu link after multiple attempts": The restaurant website may have an unusual structure. Try manually checking if there's a clear "Menu" link. Increase MAX_RETRIES in config.py if needed.
 - Popup/modal blocking: The script attempts to close popups automatically, but some sites have persistent overlays. Check the Browserbase live view link to debug.
diff --git a/python/menu-dynamic-extraction-demo/config.py b/python/menu-dynamic-extraction-demo/config.py
index 6b624ac2..cd5f805a 100644
--- a/python/menu-dynamic-extraction-demo/config.py
+++ b/python/menu-dynamic-extraction-demo/config.py
@@ -6,7 +6,6 @@
 import os
 import logging
 from dotenv import load_dotenv
-from browserbase import Browserbase
 
 # Load environment variables from .env file
 load_dotenv()
@@ -39,6 +38,3 @@
     format="%(asctime)s %(levelname)s %(name)s: %(message)s",
 )
 logger = logging.getLogger(__name__)
-
-# Initialize Browserbase client
-bb = Browserbase(api_key=BROWSERBASE_API_KEY)
diff --git a/python/menu-dynamic-extraction-demo/main.py b/python/menu-dynamic-extraction-demo/main.py
index 006fed53..f017d550 100644
--- a/python/menu-dynamic-extraction-demo/main.py
+++ b/python/menu-dynamic-extraction-demo/main.py
@@ -15,14 +15,12 @@
     python main.py --batch      # Batch mode - processes URLs from websites.txt
 """
 
-from playwright.sync_api import sync_playwright
 from stagehand import Stagehand
 from config import (
     BROWSERBASE_API_KEY,
     BROWSERBASE_PROJECT_ID,
     MODEL_API_KEY,
     NO_MENU_LINK_FOUND,
-    bb,
     logger
 )
 from models import MENU_SCHEMA
@@ -32,10 +30,6 @@
 
 def main():
     """Main function for interactive single-restaurant extraction."""
-    # Create Browserbase session
-    session = bb.sessions.create(project_id=BROWSERBASE_PROJECT_ID)
-    session_id = session.id
-
     # Initialize Stagehand client
     client = Stagehand(
         browserbase_api_key=BROWSERBASE_API_KEY,
@@ -43,59 +37,56 @@ def main():
         model_api_key=MODEL_API_KEY,
     )
 
+    stagehand_session = client.sessions.start(
+        model_name="google/gemini-2.5-flash",
+    )
+    session_id = stagehand_session.data.session_id
     logger.info(f"Session started: {session_id}")
     logger.info(f"Watch live: https://browserbase.com/sessions/{session_id}")
 
     try:
-        # Connect Playwright to Browserbase
-        with sync_playwright() as p:
-            browser = p.chromium.connect_over_cdp(
-                f"wss://connect.browserbase.com?apiKey={BROWSERBASE_API_KEY}&sessionId={session_id}"
+        # Get website URL from user
+        website_url = normalize_url(get_website_from_user())
+        logger.info(f"Navigating to {website_url} ...")
+
+        # Navigate to website using Stagehand
+        client.sessions.navigate(
+            id=session_id,
+            url=website_url,
+        )
+        
+        # Close any popups
+        close_popups(client, session_id)
+
+        # Locate menu link with retries
+        all_menu_sections = []
+        menu_link = find_menu_link(client, session_id)
+        if menu_link == NO_MENU_LINK_FOUND:
+            logger.error("Could not find menu link after multiple attempts.")
+        else:
+            logger.info(f"Menu link found: {menu_link}")
+
+            # Navigate to menu
+            client.sessions.act(
+                id=session_id,
+                input=f"Click on: {menu_link[0] if isinstance(menu_link, list) else menu_link}",
+            )
+
+            # Find menu subsections
+            sections_response = client.sessions.observe(
+                id=session_id,
+                instruction="Find all subsections on the current menu page, i.e. 'Lunch', 'Dinner', 'Happy Hour', etc. "
+                           "Return them as a list of links. If none found, return the current page link only in a list. "
+                           "Do not return duplicates if a link appears multiple times.",
             )
-            ctx = browser.contexts[0]
-            page = ctx.pages[0] if ctx.pages else ctx.new_page()
-
-            # Get website URL from user
-            website_url = normalize_url(get_website_from_user())
-            logger.info(f"Navigating to {website_url} ...")
-            page.goto(website_url, wait_until="domcontentloaded")
-
-            # Close any popups
-            close_popups(client, session_id)
-
-            # Locate menu link with retries
-            all_menu_sections = []
-            menu_link = find_menu_link(client, session_id)
-            if menu_link == NO_MENU_LINK_FOUND:
-                logger.error("Could not find menu link after multiple attempts.")
-            else:
-                logger.info(f"Menu link found: {menu_link}")
-
-                # Navigate to menu
-                client.sessions.act(
-                    id=session_id,
-                    input=f"Click on: {menu_link[0] if isinstance(menu_link, list) else menu_link}",
-                )
-
-                page.wait_for_load_state("load", timeout=20000)
-
-                # Find menu subsections
-                sections_response = client.sessions.observe(
-                    id=session_id,
-                    instruction="Find all subsections on the current menu page, i.e. 'Lunch', 'Dinner', 'Happy Hour', etc. "
-                               "Return them as a list of links. If none found, return the current page link only in a list. "
-                               "Do not return duplicates if a link appears multiple times.",
-                )
-                sections = sections_response.data.result
-
-                # Extract menu from all sections
-                all_menu_sections = extract_menu_from_sections(client, session_id, page, sections)
-
-            # Save combined menu data to JSON file
-            if all_menu_sections:
-                save_menu_to_json(website_url, all_menu_sections)
-
-            browser.close()
+            sections = sections_response.data.result
+
+            # Extract menu from all sections
+            all_menu_sections = extract_menu_from_sections(client, session_id, sections)
+
+        # Save combined menu data to JSON file
+        if all_menu_sections:
+            save_menu_to_json(website_url, all_menu_sections)
 
     finally:
         # End session
diff --git a/python/menu-dynamic-extraction-demo/pyproject.toml b/python/menu-dynamic-extraction-demo/pyproject.toml
index 262deef4..ebfeafba 100644
--- a/python/menu-dynamic-extraction-demo/pyproject.toml
+++ b/python/menu-dynamic-extraction-demo/pyproject.toml
@@ -5,11 +5,9 @@ description = "Restaurant menu extraction using Stagehand and Browserbase"
 readme = "README.md"
 requires-python = ">=3.9"
 dependencies = [
-    "browserbase>=1.4.0",
-    "playwright>=1.40.0",
     "pydantic>=2.0.0",
     "python-dotenv>=1.2.1",
-    "stagehand>=3.0.0",  # v3 API - pure API client
+    "stagehand>=3.0.0",  # v3 API - manages Browserbase sessions internally
 ]
 
 [project.optional-dependencies]
diff --git a/python/menu-dynamic-extraction-demo/scraper.py b/python/menu-dynamic-extraction-demo/scraper.py
index e3bf9e57..8e0875a3 100644
--- a/python/menu-dynamic-extraction-demo/scraper.py
+++ b/python/menu-dynamic-extraction-demo/scraper.py
@@ -7,7 +7,6 @@
 import logging
 from typing import List, Dict, Any, Optional
 from datetime import datetime
-from playwright.sync_api import sync_playwright, Page
 from stagehand import Stagehand
 from config import (
     BROWSERBASE_API_KEY,
@@ -15,7 +14,6 @@
     MODEL_API_KEY,
     NO_MENU_LINK_FOUND,
     MAX_RETRIES,
-    bb,
     logger
 )
 from models import MENU_SCHEMA
@@ -80,7 +78,6 @@ def find_menu_link(client: Stagehand, session_id: str, max_retries: int = MAX_RE
 def extract_menu_from_sections(
     client: Stagehand,
     session_id: str,
-    page: Page,
     sections: List[Any]
 ) -> List[Dict[str, Any]]:
     """
@@ -89,7 +86,6 @@ def extract_menu_from_sections(
     Args:
         client: Stagehand client instance
         session_id: Active session ID
-        page: Playwright page instance
         sections: List of menu sections to extract
 
     Returns:
@@ -112,8 +108,6 @@ def extract_menu_from_sections(
             input=f"Navigate to: {section_desc}",
         )
 
-        page.wait_for_load_state("load", timeout=20000)
-
         # Extract menu data
         extract_response = client.sessions.extract(
             id=session_id,
@@ -157,10 +151,6 @@ def process_restaurant(website_url: str, agent_id: int) -> Dict[str, Any]:
         "error": None,
     }
 
-    # Create Browserbase session
-    session = bb.sessions.create(project_id=BROWSERBASE_PROJECT_ID)
-    session_id = session.id
-
     # Initialize Stagehand client
     client = Stagehand(
         browserbase_api_key=BROWSERBASE_API_KEY,
@@ -168,57 +158,54 @@ def process_restaurant(website_url: str, agent_id: int) -> Dict[str, Any]:
         model_api_key=MODEL_API_KEY,
     )
 
+    # Start Stagehand session
+    stagehand_session = client.sessions.start(
+        model_name="google/gemini-2.5-flash",
+    )
+    session_id = stagehand_session.data.session_id
+
     agent_logger.info(f"Session started: {session_id}")
     agent_logger.info(f"Watch live: https://browserbase.com/sessions/{session_id}")
 
     try:
-        # Connect Playwright to Browserbase
-        with sync_playwright() as p:
-            browser = p.chromium.connect_over_cdp(
-                f"wss://connect.browserbase.com?apiKey={BROWSERBASE_API_KEY}&sessionId={session_id}"
-            )
-            ctx = browser.contexts[0]
-            page = ctx.pages[0] if ctx.pages else ctx.new_page()
+        # Navigate to website using Stagehand
+        agent_logger.info(f"Navigating to {website_url}")
+        client.sessions.act(
+            id=session_id,
+            input=f"Go to {website_url}",
+        )
 
-            # Navigate to website
-            agent_logger.info(f"Navigating to {website_url}")
-            page.goto(website_url, wait_until="domcontentloaded")
+        # Close any popups on initial page load
+        close_popups(client, session_id, agent_logger)
 
-            # Close any popups on initial page load
+        # Extract menu data
+        all_menu_sections = []
+        menu_link = find_menu_link(client, session_id)
+        if menu_link == NO_MENU_LINK_FOUND:
+            agent_logger.warning("Could not find menu link")
+        else:
+            agent_logger.info(f"Menu link: {menu_link}")
+
+            # Navigate to menu link
+            client.sessions.act(
+                id=session_id,
+                input=f"Click on: {menu_link[0] if isinstance(menu_link, list) else menu_link}",
+            )
+
+            # Close any popups after navigating to menu page
             close_popups(client, session_id, agent_logger)
 
-            # Extract menu data
-            all_menu_sections = []
-            menu_link = find_menu_link(client, session_id)
-            if menu_link == NO_MENU_LINK_FOUND:
-                agent_logger.warning("Could not find menu link")
-            else:
-                agent_logger.info(f"Menu link: {menu_link}")
-
-                # Navigate to menu link
-                client.sessions.act(
-                    id=session_id,
-                    input=f"Click on: {menu_link[0] if isinstance(menu_link, list) else menu_link}",
-                )
-
-                page.wait_for_load_state("load", timeout=20000)
-
-                # Close any popups after navigating to menu page
-                close_popups(client, session_id, agent_logger)
-
-                # Extract menu sections
-                sections_response = client.sessions.observe(
-                    id=session_id,
-                    instruction="Find all subsections on the current menu page, i.e. 'Lunch', 'Dinner', 'Happy Hour', etc. "
-                               "Return them as a list of links. If none found, return the current page link only in a list. "
-                               "Do not return duplicates if a link appears multiple times.",
-                )
-                sections = sections_response.data.result
-
-                # Extract menu from all sections
-                all_menu_sections = extract_menu_from_sections(client, session_id, page, sections)
-
-            browser.close()
+            # Extract menu sections
+            sections_response = client.sessions.observe(
+                id=session_id,
+                instruction="Find all subsections on the current menu page, i.e. 'Lunch', 'Dinner', 'Happy Hour', etc. "
+                           "Return them as a list of links. If none found, return the current page link only in a list. "
+                           "Do not return duplicates if a link appears multiple times.",
+            )
+            sections = sections_response.data.result
+
+            # Extract menu from all sections
+            all_menu_sections = extract_menu_from_sections(client, session_id, sections)
 
         result["status"] = "success"
         end_time = datetime.now()