From d681cba590ac32d33e24aeba62c3d8a2c06f94de Mon Sep 17 00:00:00 2001 From: Claude Date: Tue, 16 Dec 2025 16:05:23 +0000 Subject: [PATCH 1/4] Add BibTeX entry support for figure metadata - Add :bib: directive option to specify/generate BibTeX keys - Extract figure metadata (author, date, license, source) from existing bib entries - Generate BibTeX entries from figure metadata when bib.generate is enabled - Add new configuration options under metadata_figure_settings.bib: - generate: Enable automatic bib entry generation - output_file: Output path for generated .bib file - entry_type: BibTeX entry type (default: misc) - key_prefix: Prefix for auto-generated keys - extract_metadata: Enable metadata extraction from bib entries - Update README with bib configuration and usage documentation --- README.md | 72 ++++++ src/sphinx_metadata_figure/__init__.py | 326 ++++++++++++++++++++++++- 2 files changed, 389 insertions(+), 9 deletions(-) diff --git a/README.md b/README.md index abf816e..eb27e38 100644 --- a/README.md +++ b/README.md @@ -136,6 +136,32 @@ The `copyright` key contains options for how to handle copyright metadata. The `source` key contains options for how to handle source metadata. - `warn_missing`: If `true`, a warning will be generated for each figure without source information. +### Bib + +The `bib` key contains options for BibTeX entry support. This allows you to: +1. Generate BibTeX entries from figure metadata +2. Extract figure metadata from existing BibTeX entries + +Configuration options: +- `generate`: If `true`, BibTeX entries will be generated from figure metadata and written to a `.bib` file after the build completes. Default: `false`. +- `output_file`: The filename for the generated BibTeX file (relative to the output directory). Default: `_figure_metadata.bib`. +- `entry_type`: The BibTeX entry type to use for generated entries. Default: `misc`. +- `key_prefix`: Prefix for auto-generated BibTeX keys when no explicit `:bib:` key is provided. Default: `fig:`. +- `extract_metadata`: If `true`, metadata will be extracted from existing BibTeX entries when the `:bib:` option references a valid key. Default: `true`. + +Example configuration: +```yaml +sphinx: + config: + metadata_figure_settings: + bib: + generate: true + output_file: figures.bib + entry_type: misc + key_prefix: fig: + extract_metadata: true +``` + ## Usage The figure directive and the [MyST-NB sphinx extension's `glue:figure` directive](https://myst-nb.readthedocs.io/en/latest/render/glue.html#the-glue-figure-directive) are extended with the following options to add metadata: @@ -171,6 +197,52 @@ The figure directive and the [MyST-NB sphinx extension's `glue:figure` directive - `admonition_class`: - Optionally override the global `admonition_class` setting for this figure only. - Only relevant if `placement` is `admonition` or `margin`. +- `bib`: + - Optionally specify a BibTeX key for this figure. + - When specified with an existing key in your `.bib` files, metadata (author, date, source, license) will be extracted from the bib entry. + - When `bib.generate` is enabled in configuration, a BibTeX entry will be generated for this figure using the specified key (or an auto-generated key if not provided). + - Explicit metadata options (`:author:`, `:license:`, etc.) take precedence over extracted bib metadata. + +### BibTeX Integration Examples + +**Extract metadata from existing bib entry:** +```rst +.. figure:: images/diagram.png + :bib: smith2024diagram + + A diagram from Smith's paper +``` + +**Generate bib entries from figure metadata:** +```yaml +# In _config.yml +sphinx: + config: + metadata_figure_settings: + bib: + generate: true +``` + +```rst +.. figure:: images/photo.jpg + :author: John Doe + :license: CC-BY + :date: 2024-06-15 + :bib: my_photo_key + + A photograph by John Doe +``` + +This will generate a BibTeX entry like: +```bibtex +@misc{my_photo_key, + author = {John Doe}, + title = {A photograph by John Doe}, + year = {2024}, + date = {2024-06-15}, + note = {License: CC-BY} +} +``` ## Documentation diff --git a/src/sphinx_metadata_figure/__init__.py b/src/sphinx_metadata_figure/__init__.py index 2cb4731..17f0e40 100644 --- a/src/sphinx_metadata_figure/__init__.py +++ b/src/sphinx_metadata_figure/__init__.py @@ -63,13 +63,21 @@ METADATA_FIGURE_DEFAULTS_SOURCE = { 'warn_missing' : False } +METADATA_FIGURE_DEFAULTS_BIB = { + 'generate': False, # Generate bib entries from figure metadata + 'output_file': '_figure_metadata.bib', # Where to write generated entries + 'entry_type': 'misc', # BibTeX entry type for figures + 'key_prefix': 'fig:', # Prefix for auto-generated bib keys + 'extract_metadata': True, # Extract metadata from bib entries when :bib: is specified +} METADATA_FIGURE_DEFAULTS = { 'style': METADATA_FIGURE_DEFAULTS_STYLE, 'license': METADATA_FIGURE_DEFAULTS_LICENSE, 'author': METADATA_FIGURE_DEFAULTS_AUTHOR, 'date': METADATA_FIGURE_DEFAULTS_DATE, 'copyright': METADATA_FIGURE_DEFAULTS_COPYRIGHT, - 'source': METADATA_FIGURE_DEFAULTS_SOURCE + 'source': METADATA_FIGURE_DEFAULTS_SOURCE, + 'bib': METADATA_FIGURE_DEFAULTS_BIB, } # List of valid licenses @@ -112,6 +120,186 @@ 'Pexels License': 'https://www.pexels.com/license/', } +# Storage for generated bib entries (populated during build) +_generated_bib_entries = {} + + +def _sanitize_bib_key(key): + """Sanitize a string to be a valid BibTeX key.""" + import re + # Replace invalid characters with underscores + sanitized = re.sub(r'[^a-zA-Z0-9_:-]', '_', key) + # Ensure it starts with a letter + if sanitized and not sanitized[0].isalpha(): + sanitized = 'fig_' + sanitized + return sanitized + + +def _generate_bib_key(figure_path, prefix='fig:'): + """Generate a BibTeX key from the figure path.""" + # Use the figure filename without extension as base + base = os.path.splitext(os.path.basename(figure_path))[0] + return _sanitize_bib_key(prefix + base) + + +def _metadata_to_bib_entry(key, metadata, entry_type='misc'): + """ + Convert figure metadata to a BibTeX entry string. + + Args: + key: The BibTeX key for this entry + metadata: Dict with keys like 'author', 'date', 'title', 'license', 'source', 'copyright' + entry_type: The BibTeX entry type (default: 'misc') + + Returns: + str: A formatted BibTeX entry + """ + lines = [f'@{entry_type}{{{key},'] + + if metadata.get('author'): + lines.append(f' author = {{{metadata["author"]}}},') + + if metadata.get('title'): + lines.append(f' title = {{{metadata["title"]}}},') + + if metadata.get('date'): + # Extract year from YYYY-MM-DD format + try: + year = metadata['date'].split('-')[0] + lines.append(f' year = {{{year}}},') + # Also include full date as custom field + lines.append(f' date = {{{metadata["date"]}}},') + except (ValueError, IndexError): + lines.append(f' date = {{{metadata["date"]}}},') + + if metadata.get('license'): + lines.append(f' note = {{License: {metadata["license"]}}},') + + if metadata.get('source'): + source = metadata['source'] + # Handle markdown link format [text](url) + if '](http' in source: + # Extract URL from markdown link + url = source.split('](')[1].rstrip(')') + lines.append(f' url = {{{url}}},') + lines.append(f' howpublished = {{\\url{{{url}}}}},') + elif source.startswith('http'): + lines.append(f' url = {{{source}}},') + lines.append(f' howpublished = {{\\url{{{source}}}}},') + else: + lines.append(f' howpublished = {{{source}}},') + + if metadata.get('copyright'): + lines.append(f' copyright = {{{metadata["copyright"]}}},') + + # Remove trailing comma from last field + if lines[-1].endswith(','): + lines[-1] = lines[-1][:-1] + + lines.append('}') + return '\n'.join(lines) + + +def _parse_bib_entry(bib_content, key): + """ + Parse a BibTeX entry and extract metadata fields. + + Args: + bib_content: The full content of a .bib file + key: The BibTeX key to look up + + Returns: + dict: Extracted metadata or None if not found + """ + import re + + # Find the entry with the given key + # Pattern matches @type{key, ... } + pattern = rf'@\w+\s*\{{\s*{re.escape(key)}\s*,([^@]*?)\}}\s*(?=@|\Z)' + match = re.search(pattern, bib_content, re.DOTALL | re.IGNORECASE) + + if not match: + return None + + entry_content = match.group(1) + metadata = {} + + # Extract fields - pattern matches field = {value} or field = "value" + field_pattern = r'(\w+)\s*=\s*(?:\{([^{}]*(?:\{[^{}]*\}[^{}]*)*)\}|"([^"]*)")' + + for field_match in re.finditer(field_pattern, entry_content, re.DOTALL): + field_name = field_match.group(1).lower() + field_value = field_match.group(2) or field_match.group(3) + field_value = field_value.strip() + + if field_name == 'author': + metadata['author'] = field_value + elif field_name == 'title': + metadata['title'] = field_value + elif field_name == 'year': + # Convert year to date format + if 'date' not in metadata: + metadata['date'] = f'{field_value}-01-01' + elif field_name == 'date': + metadata['date'] = field_value + elif field_name == 'url': + metadata['source'] = field_value + elif field_name == 'howpublished': + # Extract URL from \url{...} if present + url_match = re.search(r'\\url\{([^}]+)\}', field_value) + if url_match: + metadata['source'] = url_match.group(1) + elif 'source' not in metadata: + metadata['source'] = field_value + elif field_name == 'note': + # Try to extract license from note field + license_match = re.search(r'License:\s*(.+)', field_value, re.IGNORECASE) + if license_match: + metadata['license'] = license_match.group(1).strip() + elif field_name == 'copyright': + metadata['copyright'] = field_value + + return metadata if metadata else None + + +def _load_bib_files(app): + """ + Load all .bib files configured in sphinxcontrib-bibtex or in source directory. + + Returns: + str: Combined content of all bib files + """ + bib_content = '' + + # Try to get bib files from sphinxcontrib-bibtex configuration + bibtex_files = getattr(app.config, 'bibtex_bibfiles', []) + + # Also search for .bib files in the source directory + srcdir = app.srcdir + for bib_file in bibtex_files: + bib_path = os.path.join(srcdir, bib_file) + if os.path.exists(bib_path): + try: + with open(bib_path, 'r', encoding='utf-8') as f: + bib_content += f.read() + '\n' + except Exception as e: + logger.debug(f'Could not read bib file {bib_path}: {e}') + + # Search for any .bib files in source directory if none configured + if not bib_content: + for root, dirs, files in os.walk(srcdir): + for file in files: + if file.endswith('.bib'): + bib_path = os.path.join(root, file) + try: + with open(bib_path, 'r', encoding='utf-8') as f: + bib_content += f.read() + '\n' + except Exception as e: + logger.debug(f'Could not read bib file {bib_path}: {e}') + + return bib_content + + class MetadataFigure(Figure): """ Enhanced figure directive with metadata support. @@ -136,6 +324,8 @@ class MetadataFigure(Figure): 'show': directives.unchanged, # comma-separated: author,license,date 'admonition_title': directives.unchanged, # admonition title (default: Attribution) 'admonition_class': directives.unchanged, # extra classes for admonition + # Bib entry support + 'bib': directives.unchanged, # BibTeX key to use/generate for this figure }) def run(self): @@ -156,8 +346,27 @@ def run(self): for key in METADATA_FIGURE_DEFAULTS: settings[key] = METADATA_FIGURE_DEFAULTS[key] | user_settings.get(key, {}) - # Validate license - license_value = self.options.get('license', None) + # Handle bib entry extraction - extract metadata from bib entry if :bib: is specified + bib_key = self.options.get('bib', None) + bib_settings = settings['bib'] + bib_metadata = {} + + if bib_key and bib_settings['extract_metadata'] and env: + # Load bib files and try to extract metadata + bib_content = _load_bib_files(env.app) + if bib_content: + extracted = _parse_bib_entry(bib_content, bib_key) + if extracted: + bib_metadata = extracted + logger.debug(f'Extracted metadata from bib entry "{bib_key}": {extracted}') + else: + logger.warning( + f'BibTeX key "{bib_key}" not found in any .bib files', + location=(self.state.document.current_source, self.lineno) + ) + + # Validate license (explicit option > bib metadata > defaults) + license_value = self.options.get('license', None) or bib_metadata.get('license', None) license_settings = settings['license'] if not license_value: if license_settings['substitute_missing']: @@ -193,8 +402,8 @@ def run(self): location=(self.state.document.current_source, self.lineno) ) - # Validate date format (optional) - date_value = self.options.get('date',None) + # Validate date format (explicit option > bib metadata > defaults) + date_value = self.options.get('date', None) or bib_metadata.get('date', None) if not date_value: date_settings = settings['date'] if date_settings['substitute_missing']: @@ -214,7 +423,8 @@ def run(self): location=(self.state.document.current_source, self.lineno) ) - author_value = self.options.get('author',None) + # Author value (explicit option > bib metadata > defaults) + author_value = self.options.get('author', None) or bib_metadata.get('author', None) if not author_value: author_settings = settings['author'] if author_settings['substitute_missing']: @@ -224,7 +434,8 @@ def run(self): else: author_value = default_author - copyright_value = self.options.get('copyright', None) + # Copyright value (explicit option > bib metadata > defaults) + copyright_value = self.options.get('copyright', None) or bib_metadata.get('copyright', None) if not copyright_value: copyright_settings = settings['copyright'] if copyright_settings['substitute_missing']: @@ -267,7 +478,8 @@ def run(self): else: copyright_value = default_copyright - source_value = self.options.get('source', None) + # Source value (explicit option > bib metadata) + source_value = self.options.get('source', None) or bib_metadata.get('source', None) source_settings = settings['source'] if source_value is None: if source_settings['warn_missing']: @@ -336,6 +548,44 @@ def run(self): if source_value: figure_node['source'] = source_value + # Generate bib entry if enabled + if bib_settings['generate']: + # Determine bib key: use explicit :bib: value, or auto-generate from figure path + figure_path = self.arguments[0] if self.arguments else 'unknown' + generated_bib_key = bib_key if bib_key else _generate_bib_key( + figure_path, bib_settings['key_prefix'] + ) + + # Get title from figure caption if available + figure_title = None + for child in figure_node.children: + if isinstance(child, nodes.caption): + figure_title = child.astext() + break + if not figure_title: + figure_title = bib_metadata.get('title', figure_path) + + # Build metadata dict for bib entry + bib_entry_metadata = { + 'author': author_value, + 'title': figure_title, + 'date': date_value, + 'license': license_value, + 'source': source_value, + 'copyright': copyright_value, + } + + # Generate and store the bib entry + bib_entry = _metadata_to_bib_entry( + generated_bib_key, + bib_entry_metadata, + bib_settings['entry_type'] + ) + _generated_bib_entries[generated_bib_key] = bib_entry + + # Store bib key on figure node for reference + figure_node['bib_key'] = generated_bib_key + # Determine rendering controls style_settings = settings['style'] placement = self.options.get('placement') or style_settings['placement'] @@ -507,6 +757,61 @@ def check_all_figures_have_license(app, env): logger.warning(f' - {docname}: {image_uri}') +def write_bib_file(app, exc): + """ + Write generated bib entries to a file after build completes. + + This function is called via the build-finished event and writes all + accumulated bib entries to a single .bib file. + + Args: + app: Sphinx application instance + exc: Exception raised during build, or None if successful + """ + global _generated_bib_entries + + if exc is not None: + # Build failed, don't write bib file + return + + if not _generated_bib_entries: + # No bib entries generated + return + + # Get settings + user_settings = getattr(app.config, 'metadata_figure_settings', {}) if app else {} + settings = {} + for key in METADATA_FIGURE_DEFAULTS: + settings[key] = METADATA_FIGURE_DEFAULTS[key] | user_settings.get(key, {}) + + bib_settings = settings['bib'] + if not bib_settings['generate']: + return + + # Determine output path + output_file = bib_settings['output_file'] + if not os.path.isabs(output_file): + output_path = os.path.join(app.outdir, output_file) + else: + output_path = output_file + + # Write all bib entries to the file + try: + with open(output_path, 'w', encoding='utf-8') as f: + f.write('% Auto-generated BibTeX entries from figure metadata\n') + f.write(f'% Generated by sphinx-metadata-figure extension\n') + f.write(f'% Total entries: {len(_generated_bib_entries)}\n\n') + for key, entry in sorted(_generated_bib_entries.items()): + f.write(entry) + f.write('\n\n') + logger.info(f'Generated bib file with {len(_generated_bib_entries)} entries: {output_path}') + except Exception as e: + logger.warning(f'Failed to write bib file {output_path}: {e}') + + # Clear entries for next build + _generated_bib_entries.clear() + + def setup(app): """ Setup function for the Sphinx extension. @@ -532,7 +837,10 @@ def setup(app): # Add custom CSS for metadata styling app.add_css_file('metadata_figure.css') app.connect("build-finished", copy_asset_files) - + + # Generate bib file after build if enabled + app.connect("build-finished", write_bib_file) + # Register event handler to check all figures after build app.connect('env-updated', check_all_figures_have_license) From 474b76ab0c1c68ddcc8042442e33cf6b78f8074e Mon Sep 17 00:00:00 2001 From: Dennis den Ouden-van der Horst Date: Fri, 19 Dec 2025 16:26:52 +0100 Subject: [PATCH 2/4] Remove BibTeX generation and update documentation Eliminates support for generating BibTeX entries from figure metadata, including related configuration options and code paths. Updates the README and MANUAL to reflect that only extraction of metadata from existing BibTeX entries is supported. Cleans up unused code and clarifies documentation for BibTeX integration. --- MANUAL.ipynb | 44 +++++- README.md | 65 +------- src/sphinx_metadata_figure/__init__.py | 199 ++----------------------- 3 files changed, 55 insertions(+), 253 deletions(-) diff --git a/MANUAL.ipynb b/MANUAL.ipynb index b80d286..d36773a 100644 --- a/MANUAL.ipynb +++ b/MANUAL.ipynb @@ -418,8 +418,50 @@ "\n", "::::{include} README.md\n", ":start-after: \"\"\n", - "::::\n" + "::::\n", + "\n", + "\n", + "### Example 11: Metadata with BibTeX extraction\n", + "\n", + "````md\n", + "```{figure} /images/TeachBooks_logo.svg\n", + ":name: tb_logo_metadata11\n", + ":width: 50%\n", + ":bib: TeachBooksLogo\n", + "\n", + "The logo of TeachBooks.\n", + "```\n", + "````\n", + "\n", + "```{figure} /images/TeachBooks_logo.svg\n", + ":name: tb_logo_metadata11\n", + ":width: 50%\n", + ":bib: TeachBooksLogo\n", + "\n", + "The logo of TeachBooks.\n", + "```\n", + "\n", + "The corresponding BibTeX entry in a `.bib` file would be:\n", + "\n", + "````{bibtex}\n", + "@misc{TeachBooksLogo,\n", + " author = {Veronica Comin},\n", + " title = {The logo of TeachBooks.},\n", + " year = {2024},\n", + " date = {2024-11-13},\n", + " note = {License: CC-BY},\n", + " url = {https://github.com/TeachBooks/logos_and_visualisations},\n", + " howpublished = {\\url{https://github.com/TeachBooks/logos_and_visualisations}},\n", + " copyright = {© TeachBooks 2024}\n", + "}\n", + "````" ] + }, + { + "cell_type": "markdown", + "id": "8493ba3e", + "metadata": {}, + "source": [] } ], "metadata": { diff --git a/README.md b/README.md index eb27e38..70ccb80 100644 --- a/README.md +++ b/README.md @@ -68,6 +68,8 @@ sphinx: default_copyright: authoryear source: warn_missing: false + bib: + extract_metadata: true ``` Each of the level 1 keys in `metadata_figure_settings` must be a dictionary of key-value pairs. Each level 1 ley will be discussed next, including the options. @@ -138,30 +140,11 @@ The `source` key contains options for how to handle source metadata. ### Bib -The `bib` key contains options for BibTeX entry support. This allows you to: -1. Generate BibTeX entries from figure metadata -2. Extract figure metadata from existing BibTeX entries +The `bib` key contains options for BibTeX entry support. This allows you to extract figure metadata from existing BibTeX entries. Configuration options: -- `generate`: If `true`, BibTeX entries will be generated from figure metadata and written to a `.bib` file after the build completes. Default: `false`. -- `output_file`: The filename for the generated BibTeX file (relative to the output directory). Default: `_figure_metadata.bib`. -- `entry_type`: The BibTeX entry type to use for generated entries. Default: `misc`. -- `key_prefix`: Prefix for auto-generated BibTeX keys when no explicit `:bib:` key is provided. Default: `fig:`. - `extract_metadata`: If `true`, metadata will be extracted from existing BibTeX entries when the `:bib:` option references a valid key. Default: `true`. -Example configuration: -```yaml -sphinx: - config: - metadata_figure_settings: - bib: - generate: true - output_file: figures.bib - entry_type: misc - key_prefix: fig: - extract_metadata: true -``` - ## Usage The figure directive and the [MyST-NB sphinx extension's `glue:figure` directive](https://myst-nb.readthedocs.io/en/latest/render/glue.html#the-glue-figure-directive) are extended with the following options to add metadata: @@ -200,50 +183,8 @@ The figure directive and the [MyST-NB sphinx extension's `glue:figure` directive - `bib`: - Optionally specify a BibTeX key for this figure. - When specified with an existing key in your `.bib` files, metadata (author, date, source, license) will be extracted from the bib entry. - - When `bib.generate` is enabled in configuration, a BibTeX entry will be generated for this figure using the specified key (or an auto-generated key if not provided). - Explicit metadata options (`:author:`, `:license:`, etc.) take precedence over extracted bib metadata. -### BibTeX Integration Examples - -**Extract metadata from existing bib entry:** -```rst -.. figure:: images/diagram.png - :bib: smith2024diagram - - A diagram from Smith's paper -``` - -**Generate bib entries from figure metadata:** -```yaml -# In _config.yml -sphinx: - config: - metadata_figure_settings: - bib: - generate: true -``` - -```rst -.. figure:: images/photo.jpg - :author: John Doe - :license: CC-BY - :date: 2024-06-15 - :bib: my_photo_key - - A photograph by John Doe -``` - -This will generate a BibTeX entry like: -```bibtex -@misc{my_photo_key, - author = {John Doe}, - title = {A photograph by John Doe}, - year = {2024}, - date = {2024-06-15}, - note = {License: CC-BY} -} -``` - ## Documentation Further documentation for this extension is available in the [TeachBooks manual](https://teachbooks.io/manual/_git/github.com_TeachBooks_Sphinx-Metadata-Figure/main/MANUAL.html). diff --git a/src/sphinx_metadata_figure/__init__.py b/src/sphinx_metadata_figure/__init__.py index 17f0e40..934d8fe 100644 --- a/src/sphinx_metadata_figure/__init__.py +++ b/src/sphinx_metadata_figure/__init__.py @@ -25,8 +25,6 @@ from sphinx.writers.html import HTMLTranslator -from docutils import nodes - from sphinx.locale import get_translation MESSAGE_CATALOG_NAME = "sphinx_metadata_figure" translate = get_translation(MESSAGE_CATALOG_NAME) @@ -64,10 +62,6 @@ 'warn_missing' : False } METADATA_FIGURE_DEFAULTS_BIB = { - 'generate': False, # Generate bib entries from figure metadata - 'output_file': '_figure_metadata.bib', # Where to write generated entries - 'entry_type': 'misc', # BibTeX entry type for figures - 'key_prefix': 'fig:', # Prefix for auto-generated bib keys 'extract_metadata': True, # Extract metadata from bib entries when :bib: is specified } METADATA_FIGURE_DEFAULTS = { @@ -120,86 +114,6 @@ 'Pexels License': 'https://www.pexels.com/license/', } -# Storage for generated bib entries (populated during build) -_generated_bib_entries = {} - - -def _sanitize_bib_key(key): - """Sanitize a string to be a valid BibTeX key.""" - import re - # Replace invalid characters with underscores - sanitized = re.sub(r'[^a-zA-Z0-9_:-]', '_', key) - # Ensure it starts with a letter - if sanitized and not sanitized[0].isalpha(): - sanitized = 'fig_' + sanitized - return sanitized - - -def _generate_bib_key(figure_path, prefix='fig:'): - """Generate a BibTeX key from the figure path.""" - # Use the figure filename without extension as base - base = os.path.splitext(os.path.basename(figure_path))[0] - return _sanitize_bib_key(prefix + base) - - -def _metadata_to_bib_entry(key, metadata, entry_type='misc'): - """ - Convert figure metadata to a BibTeX entry string. - - Args: - key: The BibTeX key for this entry - metadata: Dict with keys like 'author', 'date', 'title', 'license', 'source', 'copyright' - entry_type: The BibTeX entry type (default: 'misc') - - Returns: - str: A formatted BibTeX entry - """ - lines = [f'@{entry_type}{{{key},'] - - if metadata.get('author'): - lines.append(f' author = {{{metadata["author"]}}},') - - if metadata.get('title'): - lines.append(f' title = {{{metadata["title"]}}},') - - if metadata.get('date'): - # Extract year from YYYY-MM-DD format - try: - year = metadata['date'].split('-')[0] - lines.append(f' year = {{{year}}},') - # Also include full date as custom field - lines.append(f' date = {{{metadata["date"]}}},') - except (ValueError, IndexError): - lines.append(f' date = {{{metadata["date"]}}},') - - if metadata.get('license'): - lines.append(f' note = {{License: {metadata["license"]}}},') - - if metadata.get('source'): - source = metadata['source'] - # Handle markdown link format [text](url) - if '](http' in source: - # Extract URL from markdown link - url = source.split('](')[1].rstrip(')') - lines.append(f' url = {{{url}}},') - lines.append(f' howpublished = {{\\url{{{url}}}}},') - elif source.startswith('http'): - lines.append(f' url = {{{source}}},') - lines.append(f' howpublished = {{\\url{{{source}}}}},') - else: - lines.append(f' howpublished = {{{source}}},') - - if metadata.get('copyright'): - lines.append(f' copyright = {{{metadata["copyright"]}}},') - - # Remove trailing comma from last field - if lines[-1].endswith(','): - lines[-1] = lines[-1][:-1] - - lines.append('}') - return '\n'.join(lines) - - def _parse_bib_entry(bib_content, key): """ Parse a BibTeX entry and extract metadata fields. @@ -230,7 +144,8 @@ def _parse_bib_entry(bib_content, key): for field_match in re.finditer(field_pattern, entry_content, re.DOTALL): field_name = field_match.group(1).lower() field_value = field_match.group(2) or field_match.group(3) - field_value = field_value.strip() + if field_value: + field_value = field_value.strip() if field_name == 'author': metadata['author'] = field_value @@ -351,6 +266,7 @@ def run(self): bib_settings = settings['bib'] bib_metadata = {} + # Check if an existing bibtex key is given if bib_key and bib_settings['extract_metadata'] and env: # Load bib files and try to extract metadata bib_content = _load_bib_files(env.app) @@ -358,12 +274,6 @@ def run(self): extracted = _parse_bib_entry(bib_content, bib_key) if extracted: bib_metadata = extracted - logger.debug(f'Extracted metadata from bib entry "{bib_key}": {extracted}') - else: - logger.warning( - f'BibTeX key "{bib_key}" not found in any .bib files', - location=(self.state.document.current_source, self.lineno) - ) # Validate license (explicit option > bib metadata > defaults) license_value = self.options.get('license', None) or bib_metadata.get('license', None) @@ -548,44 +458,6 @@ def run(self): if source_value: figure_node['source'] = source_value - # Generate bib entry if enabled - if bib_settings['generate']: - # Determine bib key: use explicit :bib: value, or auto-generate from figure path - figure_path = self.arguments[0] if self.arguments else 'unknown' - generated_bib_key = bib_key if bib_key else _generate_bib_key( - figure_path, bib_settings['key_prefix'] - ) - - # Get title from figure caption if available - figure_title = None - for child in figure_node.children: - if isinstance(child, nodes.caption): - figure_title = child.astext() - break - if not figure_title: - figure_title = bib_metadata.get('title', figure_path) - - # Build metadata dict for bib entry - bib_entry_metadata = { - 'author': author_value, - 'title': figure_title, - 'date': date_value, - 'license': license_value, - 'source': source_value, - 'copyright': copyright_value, - } - - # Generate and store the bib entry - bib_entry = _metadata_to_bib_entry( - generated_bib_key, - bib_entry_metadata, - bib_settings['entry_type'] - ) - _generated_bib_entries[generated_bib_key] = bib_entry - - # Store bib key on figure node for reference - figure_node['bib_key'] = generated_bib_key - # Determine rendering controls style_settings = settings['style'] placement = self.options.get('placement') or style_settings['placement'] @@ -756,61 +628,11 @@ def check_all_figures_have_license(app, env): for docname, image_uri in unrecognized_licenses: logger.warning(f' - {docname}: {image_uri}') - -def write_bib_file(app, exc): - """ - Write generated bib entries to a file after build completes. - - This function is called via the build-finished event and writes all - accumulated bib entries to a single .bib file. - - Args: - app: Sphinx application instance - exc: Exception raised during build, or None if successful - """ - global _generated_bib_entries - - if exc is not None: - # Build failed, don't write bib file - return - - if not _generated_bib_entries: - # No bib entries generated - return - - # Get settings - user_settings = getattr(app.config, 'metadata_figure_settings', {}) if app else {} - settings = {} - for key in METADATA_FIGURE_DEFAULTS: - settings[key] = METADATA_FIGURE_DEFAULTS[key] | user_settings.get(key, {}) - - bib_settings = settings['bib'] - if not bib_settings['generate']: - return - - # Determine output path - output_file = bib_settings['output_file'] - if not os.path.isabs(output_file): - output_path = os.path.join(app.outdir, output_file) - else: - output_path = output_file - - # Write all bib entries to the file - try: - with open(output_path, 'w', encoding='utf-8') as f: - f.write('% Auto-generated BibTeX entries from figure metadata\n') - f.write(f'% Generated by sphinx-metadata-figure extension\n') - f.write(f'% Total entries: {len(_generated_bib_entries)}\n\n') - for key, entry in sorted(_generated_bib_entries.items()): - f.write(entry) - f.write('\n\n') - logger.info(f'Generated bib file with {len(_generated_bib_entries)} entries: {output_path}') - except Exception as e: - logger.warning(f'Failed to write bib file {output_path}: {e}') - - # Clear entries for next build - _generated_bib_entries.clear() - +def _resolve_bib_output_path(app, output_file: str) -> str: + """Resolve bib output path consistently against the source directory.""" + if os.path.isabs(output_file): + return output_file + return os.path.join(app.srcdir, output_file) def setup(app): """ @@ -824,6 +646,7 @@ def setup(app): Returns: dict: Extension metadata """ + # Ensure MysST NB is loaded before this extension so the glue domain is registered app.setup_extension('myst_nb') @@ -838,9 +661,6 @@ def setup(app): app.add_css_file('metadata_figure.css') app.connect("build-finished", copy_asset_files) - # Generate bib file after build if enabled - app.connect("build-finished", write_bib_file) - # Register event handler to check all figures after build app.connect('env-updated', check_all_figures_have_license) @@ -911,4 +731,3 @@ def add_unnumbered_caption(app, doctree, fromdocname): # add an empty caption so that metadata can be appended new_caption = nodes.caption(text="") node += new_caption - \ No newline at end of file From dcd5cad7aaaa5480ee4095432607e934932cde6f Mon Sep 17 00:00:00 2001 From: Dennis den Ouden-van der Horst Date: Fri, 19 Dec 2025 16:38:09 +0100 Subject: [PATCH 3/4] Move README.md include after BibTeX example Relocated the inclusion of README.md content to follow the BibTeX example section instead of preceding it. This improves the logical flow of the manual by keeping related content together. --- MANUAL.ipynb | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/MANUAL.ipynb b/MANUAL.ipynb index d36773a..5f6296a 100644 --- a/MANUAL.ipynb +++ b/MANUAL.ipynb @@ -415,12 +415,6 @@ "The logo of TeachBooks.\n", "```\n", "\n", - "\n", - "::::{include} README.md\n", - ":start-after: \"\"\n", - "::::\n", - "\n", - "\n", "### Example 11: Metadata with BibTeX extraction\n", "\n", "````md\n", @@ -443,7 +437,7 @@ "\n", "The corresponding BibTeX entry in a `.bib` file would be:\n", "\n", - "````{bibtex}\n", + "````bibtex\n", "@misc{TeachBooksLogo,\n", " author = {Veronica Comin},\n", " title = {The logo of TeachBooks.},\n", @@ -454,7 +448,11 @@ " howpublished = {\\url{https://github.com/TeachBooks/logos_and_visualisations}},\n", " copyright = {© TeachBooks 2024}\n", "}\n", - "````" + "````\n", + "\n", + "::::{include} README.md\n", + ":start-after: \"\"\n", + "::::" ] }, { From 692d857f18ce978d6808942a0fef42304b51bffa Mon Sep 17 00:00:00 2001 From: Tom van Woudenberg Date: Mon, 22 Dec 2025 07:36:14 +0100 Subject: [PATCH 4/4] Add placement metadata to TeachBooks logo --- MANUAL.ipynb | 1 + 1 file changed, 1 insertion(+) diff --git a/MANUAL.ipynb b/MANUAL.ipynb index 5f6296a..b4f2c97 100644 --- a/MANUAL.ipynb +++ b/MANUAL.ipynb @@ -431,6 +431,7 @@ ":name: tb_logo_metadata11\n", ":width: 50%\n", ":bib: TeachBooksLogo\n", + ":placement: caption\n", "\n", "The logo of TeachBooks.\n", "```\n",