Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
43 changes: 42 additions & 1 deletion MANUAL.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -415,11 +415,52 @@
"The logo of TeachBooks.\n",
"```\n",
"\n",
"### Example 11: Metadata with BibTeX extraction\n",
"\n",
"````md\n",
"```{figure} /images/TeachBooks_logo.svg\n",
":name: tb_logo_metadata11\n",
":width: 50%\n",
":bib: TeachBooksLogo\n",
"\n",
"The logo of TeachBooks.\n",
"```\n",
"````\n",
"\n",
"```{figure} /images/TeachBooks_logo.svg\n",
":name: tb_logo_metadata11\n",
":width: 50%\n",
":bib: TeachBooksLogo\n",
":placement: caption\n",
"\n",
"The logo of TeachBooks.\n",
"```\n",
"\n",
"The corresponding BibTeX entry in a `.bib` file would be:\n",
"\n",
"````bibtex\n",
"@misc{TeachBooksLogo,\n",
" author = {Veronica Comin},\n",
" title = {The logo of TeachBooks.},\n",
" year = {2024},\n",
" date = {2024-11-13},\n",
" note = {License: CC-BY},\n",
" url = {https://github.com/TeachBooks/logos_and_visualisations},\n",
" howpublished = {\\url{https://github.com/TeachBooks/logos_and_visualisations}},\n",
" copyright = {© TeachBooks 2024}\n",
"}\n",
"````\n",
"\n",
"::::{include} README.md\n",
":start-after: \"<!-- Start contribute -->\"\n",
"::::\n"
"::::"
]
},
{
"cell_type": "markdown",
"id": "8493ba3e",
"metadata": {},
"source": []
}
],
"metadata": {
Expand Down
13 changes: 13 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,8 @@ sphinx:
default_copyright: authoryear
source:
warn_missing: false
bib:
extract_metadata: true
```

Each of the level 1 keys in `metadata_figure_settings` must be a dictionary of key-value pairs. Each level 1 ley will be discussed next, including the options.
Expand Down Expand Up @@ -136,6 +138,13 @@ The `copyright` key contains options for how to handle copyright metadata.
The `source` key contains options for how to handle source metadata.
- `warn_missing`: If `true`, a warning will be generated for each figure without source information.

### Bib

The `bib` key contains options for BibTeX entry support. This allows you to extract figure metadata from existing BibTeX entries.

Configuration options:
- `extract_metadata`: If `true`, metadata will be extracted from existing BibTeX entries when the `:bib:` option references a valid key. Default: `true`.

## Usage

The figure directive and the [MyST-NB sphinx extension's `glue:figure` directive](https://myst-nb.readthedocs.io/en/latest/render/glue.html#the-glue-figure-directive) are extended with the following options to add metadata:
Expand Down Expand Up @@ -171,6 +180,10 @@ The figure directive and the [MyST-NB sphinx extension's `glue:figure` directive
- `admonition_class`:
- Optionally override the global `admonition_class` setting for this figure only.
- Only relevant if `placement` is `admonition` or `margin`.
- `bib`:
- Optionally specify a BibTeX key for this figure.
- When specified with an existing key in your `.bib` files, metadata (author, date, source, license) will be extracted from the bib entry.
- Explicit metadata options (`:author:`, `:license:`, etc.) take precedence over extracted bib metadata.

## Documentation

Expand Down
151 changes: 139 additions & 12 deletions src/sphinx_metadata_figure/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,8 +25,6 @@

from sphinx.writers.html import HTMLTranslator

from docutils import nodes

from sphinx.locale import get_translation
MESSAGE_CATALOG_NAME = "sphinx_metadata_figure"
translate = get_translation(MESSAGE_CATALOG_NAME)
Expand Down Expand Up @@ -63,13 +61,17 @@
METADATA_FIGURE_DEFAULTS_SOURCE = {
'warn_missing' : False
}
METADATA_FIGURE_DEFAULTS_BIB = {
'extract_metadata': True, # Extract metadata from bib entries when :bib: is specified
}
METADATA_FIGURE_DEFAULTS = {
'style': METADATA_FIGURE_DEFAULTS_STYLE,
'license': METADATA_FIGURE_DEFAULTS_LICENSE,
'author': METADATA_FIGURE_DEFAULTS_AUTHOR,
'date': METADATA_FIGURE_DEFAULTS_DATE,
'copyright': METADATA_FIGURE_DEFAULTS_COPYRIGHT,
'source': METADATA_FIGURE_DEFAULTS_SOURCE
'source': METADATA_FIGURE_DEFAULTS_SOURCE,
'bib': METADATA_FIGURE_DEFAULTS_BIB,
}

# List of valid licenses
Expand Down Expand Up @@ -112,6 +114,107 @@
'Pexels License': 'https://www.pexels.com/license/',
}

def _parse_bib_entry(bib_content, key):
"""
Parse a BibTeX entry and extract metadata fields.

Args:
bib_content: The full content of a .bib file
key: The BibTeX key to look up

Returns:
dict: Extracted metadata or None if not found
"""
import re

# Find the entry with the given key
# Pattern matches @type{key, ... }
pattern = rf'@\w+\s*\{{\s*{re.escape(key)}\s*,([^@]*?)\}}\s*(?=@|\Z)'
match = re.search(pattern, bib_content, re.DOTALL | re.IGNORECASE)

if not match:
return None

entry_content = match.group(1)
metadata = {}

# Extract fields - pattern matches field = {value} or field = "value"
field_pattern = r'(\w+)\s*=\s*(?:\{([^{}]*(?:\{[^{}]*\}[^{}]*)*)\}|"([^"]*)")'

for field_match in re.finditer(field_pattern, entry_content, re.DOTALL):
field_name = field_match.group(1).lower()
field_value = field_match.group(2) or field_match.group(3)
if field_value:
field_value = field_value.strip()

if field_name == 'author':
metadata['author'] = field_value
elif field_name == 'title':
metadata['title'] = field_value
elif field_name == 'year':
# Convert year to date format
if 'date' not in metadata:
metadata['date'] = f'{field_value}-01-01'
elif field_name == 'date':
metadata['date'] = field_value
elif field_name == 'url':
metadata['source'] = field_value
elif field_name == 'howpublished':
# Extract URL from \url{...} if present
url_match = re.search(r'\\url\{([^}]+)\}', field_value)
if url_match:
metadata['source'] = url_match.group(1)
elif 'source' not in metadata:
metadata['source'] = field_value
elif field_name == 'note':
# Try to extract license from note field
license_match = re.search(r'License:\s*(.+)', field_value, re.IGNORECASE)
if license_match:
metadata['license'] = license_match.group(1).strip()
elif field_name == 'copyright':
metadata['copyright'] = field_value

return metadata if metadata else None


def _load_bib_files(app):
"""
Load all .bib files configured in sphinxcontrib-bibtex or in source directory.

Returns:
str: Combined content of all bib files
"""
bib_content = ''

# Try to get bib files from sphinxcontrib-bibtex configuration
bibtex_files = getattr(app.config, 'bibtex_bibfiles', [])

# Also search for .bib files in the source directory
srcdir = app.srcdir
for bib_file in bibtex_files:
bib_path = os.path.join(srcdir, bib_file)
if os.path.exists(bib_path):
try:
with open(bib_path, 'r', encoding='utf-8') as f:
bib_content += f.read() + '\n'
except Exception as e:
logger.debug(f'Could not read bib file {bib_path}: {e}')

# Search for any .bib files in source directory if none configured
if not bib_content:
for root, dirs, files in os.walk(srcdir):
for file in files:
if file.endswith('.bib'):
bib_path = os.path.join(root, file)
try:
with open(bib_path, 'r', encoding='utf-8') as f:
bib_content += f.read() + '\n'
except Exception as e:
logger.debug(f'Could not read bib file {bib_path}: {e}')

return bib_content


class MetadataFigure(Figure):
"""
Enhanced figure directive with metadata support.
Expand All @@ -136,6 +239,8 @@ class MetadataFigure(Figure):
'show': directives.unchanged, # comma-separated: author,license,date
'admonition_title': directives.unchanged, # admonition title (default: Attribution)
'admonition_class': directives.unchanged, # extra classes for admonition
# Bib entry support
'bib': directives.unchanged, # BibTeX key to use/generate for this figure
})

def run(self):
Expand All @@ -156,8 +261,22 @@ def run(self):
for key in METADATA_FIGURE_DEFAULTS:
settings[key] = METADATA_FIGURE_DEFAULTS[key] | user_settings.get(key, {})

# Validate license
license_value = self.options.get('license', None)
# Handle bib entry extraction - extract metadata from bib entry if :bib: is specified
bib_key = self.options.get('bib', None)
bib_settings = settings['bib']
bib_metadata = {}

# Check if an existing bibtex key is given
if bib_key and bib_settings['extract_metadata'] and env:
# Load bib files and try to extract metadata
bib_content = _load_bib_files(env.app)
if bib_content:
extracted = _parse_bib_entry(bib_content, bib_key)
if extracted:
bib_metadata = extracted

# Validate license (explicit option > bib metadata > defaults)
license_value = self.options.get('license', None) or bib_metadata.get('license', None)
license_settings = settings['license']
if not license_value:
if license_settings['substitute_missing']:
Expand Down Expand Up @@ -193,8 +312,8 @@ def run(self):
location=(self.state.document.current_source, self.lineno)
)

# Validate date format (optional)
date_value = self.options.get('date',None)
# Validate date format (explicit option > bib metadata > defaults)
date_value = self.options.get('date', None) or bib_metadata.get('date', None)
if not date_value:
date_settings = settings['date']
if date_settings['substitute_missing']:
Expand All @@ -214,7 +333,8 @@ def run(self):
location=(self.state.document.current_source, self.lineno)
)

author_value = self.options.get('author',None)
# Author value (explicit option > bib metadata > defaults)
author_value = self.options.get('author', None) or bib_metadata.get('author', None)
if not author_value:
author_settings = settings['author']
if author_settings['substitute_missing']:
Expand All @@ -224,7 +344,8 @@ def run(self):
else:
author_value = default_author

copyright_value = self.options.get('copyright', None)
# Copyright value (explicit option > bib metadata > defaults)
copyright_value = self.options.get('copyright', None) or bib_metadata.get('copyright', None)
if not copyright_value:
copyright_settings = settings['copyright']
if copyright_settings['substitute_missing']:
Expand Down Expand Up @@ -267,7 +388,8 @@ def run(self):
else:
copyright_value = default_copyright

source_value = self.options.get('source', None)
# Source value (explicit option > bib metadata)
source_value = self.options.get('source', None) or bib_metadata.get('source', None)
source_settings = settings['source']
if source_value is None:
if source_settings['warn_missing']:
Expand Down Expand Up @@ -506,6 +628,11 @@ def check_all_figures_have_license(app, env):
for docname, image_uri in unrecognized_licenses:
logger.warning(f' - {docname}: {image_uri}')

def _resolve_bib_output_path(app, output_file: str) -> str:
"""Resolve bib output path consistently against the source directory."""
if os.path.isabs(output_file):
return output_file
return os.path.join(app.srcdir, output_file)

def setup(app):
"""
Expand All @@ -519,6 +646,7 @@ def setup(app):
Returns:
dict: Extension metadata
"""

# Ensure MysST NB is loaded before this extension so the glue domain is registered
app.setup_extension('myst_nb')

Expand All @@ -532,7 +660,7 @@ def setup(app):
# Add custom CSS for metadata styling
app.add_css_file('metadata_figure.css')
app.connect("build-finished", copy_asset_files)

# Register event handler to check all figures after build
app.connect('env-updated', check_all_figures_have_license)

Expand Down Expand Up @@ -603,4 +731,3 @@ def add_unnumbered_caption(app, doctree, fromdocname):
# add an empty caption so that metadata can be appended
new_caption = nodes.caption(text="")
node += new_caption