diff --git a/docs/conf.py b/docs/conf.py index 128c1777..e3d78cff 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -270,6 +270,8 @@ # "llm_important_pages": "index, notes/cuda", # "enable_navbar_dropdowns": False, "pytorch_project": "docs", + # llms.txt configuration + "llm_deduplicate_titles": "true", # Enable title deduplication # "show_lf_header": False, # "show_lf_footer": False, # RunLLM Widget Configuration (uncomment and set assistant_id to enable) diff --git a/pytorch_sphinx_theme2/__init__.py b/pytorch_sphinx_theme2/__init__.py index d647d68a..ed6e9db9 100644 --- a/pytorch_sphinx_theme2/__init__.py +++ b/pytorch_sphinx_theme2/__init__.py @@ -203,16 +203,17 @@ def _generate_llms_txt(app, exception): The file is resolved in this order: - 1. **Explicit option** — ``llm_custom_file`` theme option pointing to a file + 1. **Explicit disable** — ``llm_disabled = "true"`` skips generation entirely. + 2. **Custom file** — ``llm_custom_file`` theme option pointing to a file relative to the Sphinx source directory. - 2. **Convention** — A file named ``llms.txt`` in the Sphinx source root. - 3. **Auto-generation** — A simple page listing following the Hugging Face - style, with URLs resolved as: + 3. **Convention** — A file named ``llms.txt`` in the Sphinx source root. + 4. **Auto-generation** — A simple page listing following the llms.txt spec, + with URLs resolved as: a. ``llm_domain`` + ``llm_base_path`` theme options → fully constructed URLs b. Sphinx ``html_baseurl`` config → baseurl + relative path c. Relative URLs as a last resort - Opt-in: set ``llm_disabled = false`` in html_theme_options to enable. + Enabled by default. Set ``llm_disabled = "true"`` to disable. """ if exception is not None: return # Don't generate if build failed @@ -220,9 +221,9 @@ def _generate_llms_txt(app, exception): if app.builder.name != "html": return - # Disabled by default; opt-in with llm_disabled = false + # Enabled by default; opt-out with llm_disabled = "true" theme_options = app.config.html_theme_options or {} - if str(theme_options.get("llm_disabled", "true")).lower() == "true": + if str(theme_options.get("llm_disabled", "false")).lower() == "true": return dest_path = Path(app.outdir) / "llms.txt" @@ -286,17 +287,60 @@ def make_url(relative_path): # Build the URL url = make_url(docname + ".html") - docs.append({"title": str(title), "url": url}) + docs.append({"title": str(title), "url": url, "docname": docname}) except Exception as e: print(f"Warning: Could not discover pages for llms.txt: {e}") + # Deduplicate titles if enabled + # This adds a disambiguating suffix to duplicate titles based on their URL path + deduplicate = ( + str(theme_options.get("llm_deduplicate_titles", "false")).lower() == "true" + ) + if deduplicate: + # Count title occurrences + title_counts = {} + for doc in docs: + title_counts[doc["title"]] = title_counts.get(doc["title"], 0) + 1 + + # Find duplicates and add disambiguation + for doc in docs: + if title_counts[doc["title"]] > 1: + # Extract module/path info from docname for disambiguation + # e.g., "generated/torch.nn.GRU" -> "torch.nn.GRU" + docname = doc["docname"] + + # Try to get a meaningful suffix from the docname + if "/" in docname: + suffix = docname.split("/")[-1] + else: + suffix = docname + + # Remove "generated/" prefix if present (Sphinx autodoc convention) + if suffix.startswith("generated/"): + suffix = suffix[10:] + + # Only add suffix if it's different from the title + if suffix.lower() != doc["title"].lower(): + doc["title"] = f"{doc['title']} ({suffix})" + # Build the llms.txt content in Hugging Face style lines = [] # Header lines.append(f"# {project}") lines.append("") + + # Quote block with project description (for spec compliance) + # If llm_description is set, use it. Otherwise, generate a generic one from project name. + llm_description = theme_options.get("llm_description", "").strip() + if not llm_description: + # Generic fallback using Sphinx project name + llm_description = f"{project} documentation." + + lines.append(f"> {llm_description}") + lines.append("") + lines.append("## Docs") lines.append("") diff --git a/pytorch_sphinx_theme2/theme.conf b/pytorch_sphinx_theme2/theme.conf index 5c0a0f73..11fbb626 100644 --- a/pytorch_sphinx_theme2/theme.conf +++ b/pytorch_sphinx_theme2/theme.conf @@ -59,12 +59,15 @@ llm_domain = # Base path after domain (e.g., "docs/", "vision/", "audio/") # Combined with domain and version to form full URLs: https://{domain}/{base_path}{version}/ llm_base_path = -# Description of the site for LLMs (appears in llm:description meta tag) +# Description of the site for LLMs (appears in llm:description meta tag and llms.txt quote block) llm_description = -# Set to false to enable llms.txt generation +# Set to true to disable llms.txt generation (enabled by default) # When enabled, URLs are resolved: llm_domain > html_baseurl > relative -llm_disabled = true +llm_disabled = false # Path to a custom llms.txt file (relative to Sphinx source directory). # When set, this file is copied to the output instead of auto-generating one. # If not set, a file named llms.txt in the source root is used automatically. llm_custom_file = +# Set to true to add disambiguating suffixes to duplicate titles +# e.g., "GRU" becomes "GRU (torch.nn.GRU)" and "GRU (torch.nn.GRUCell)" +llm_deduplicate_titles = false