Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -42,4 +42,6 @@ nosetests.xml
.venv

.idea
src/scielo-scholarly-data

# Source packages
src/
120 changes: 119 additions & 1 deletion packtools/sps/validation/graphic.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,123 @@
import os
from packtools.sps.validation.visual_resource_base import VisualResourceBaseValidation
from packtools.sps.validation.utils import build_response
from packtools.sps.models.graphic import XmlGraphic


class GraphicValidation(VisualResourceBaseValidation):
...
"""
Validation class for <graphic> and <inline-graphic> elements according to SPS 1.10.

Validates:
- @id attribute (required for both <graphic> and <inline-graphic>)
- @xlink:href attribute (required)
- File extensions (.jpg, .jpeg, .png, .tif, .tiff, .svg)
- .svg only allowed inside <alternatives>

Note: Accessibility validation (<alt-text>, <long-desc>) is handled separately
by XMLAccessibilityDataValidation in the validation pipeline to avoid duplicates.
"""

def validate(self):
"""Execute all validations for graphic/inline-graphic elements."""
yield self.validate_id()
yield self.validate_xlink_href()
yield from self.validate_svg_in_alternatives()
Comment on lines +21 to +25
Copy link

Copilot AI Mar 9, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

GraphicValidation.validate() calls the inherited validate_xlink_href(), but VisualResourceBaseValidation.validate_xlink_href() uses os.path.splitext(xlink_href) without guarding for missing @xlink:href. If xlink_href is None (attribute absent), this will raise TypeError and break the whole validation pipeline instead of reporting a CRITICAL/ERROR result. Consider overriding validate_xlink_href() in GraphicValidation (or fixing the base method) to (1) validate presence of @xlink:href and (2) only run extension validation when a value exists.

Copilot uses AI. Check for mistakes.
# Note: Accessibility validation is handled by the dedicated XMLAccessibilityDataValidation
# in the pipeline to avoid duplicate validation entries in reports

def validate_id(self):
"""
Validate @id attribute is present in <graphic> and <inline-graphic>.

Per SPS 1.10 specification, @id is required for both <graphic> and <inline-graphic> elements.
This overrides the base class behavior which exempts inline-* elements.
"""
xml = self.data.get("xml")
tag = self.data.get("tag")
id_value = self.data.get("id")

valid = bool(id_value)
elem = xml[:xml.find(">")+1] if xml else None
expected = f"id for {elem}" if not valid else None

Comment on lines +40 to +43
Copy link

Copilot AI Mar 9, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

validate_id() sets expected to None when the attribute is present. This makes OK responses carry messages like "expected None" / expected_value=None, which is inconsistent with other validators (and with the previous base behavior) and reduces report clarity. Keep expected as a stable description of the requirement (e.g., the expected attribute/pattern) regardless of validity.

Copilot uses AI. Check for mistakes.
return build_response(
title="@id",
parent=self.data,
item=tag,
sub_item=None,
is_valid=valid,
validation_type="exist",
expected=expected,
obtained=id_value,
advice=f'Add id="" to {xml}' if not valid else None,
error_level=self.params["media_attributes_error_level"],
data=self.data,
)

def validate_svg_in_alternatives(self):
"""
Validate that .svg extension is only used when <graphic> is inside <alternatives>.

Per SPS 1.10 specification:
- .svg files are only allowed when the graphic is inside <alternatives>
- Other formats (.jpg, .jpeg, .png, .tif, .tiff) can be used anywhere

Yields:
dict: Validation response
"""
xlink_href = self.data.get("xlink_href")
parent_tag = self.data.get("parent_tag")

if not xlink_href:
return

# Get file extension
_, ext = os.path.splitext(xlink_href)
ext = ext.lower()

# Check if it's an SVG file
if ext == ".svg":
# SVG is only valid inside <alternatives>
is_valid = parent_tag == "alternatives"

yield build_response(
title="SVG in alternatives",
parent=self.data,
item=self.data.get("tag"),
sub_item="xlink_href",
is_valid=is_valid,
validation_type="format",
expected="<graphic> with .svg extension inside <alternatives>",
obtained=f"{self.data.get('tag')} with .svg inside <{parent_tag}>",
advice=(
f"SVG files are only allowed inside <alternatives>. "
f"The file '{xlink_href}' is currently in <{parent_tag}>. "
f"Either move this <graphic> inside <alternatives> or use a different format (.jpg, .png, .tif)."
) if not is_valid else None,
error_level=self.params.get("svg_error_level", "ERROR"),
data=self.data,
)


class XMLGraphicValidation:
"""
Validates all <graphic> and <inline-graphic> elements in an XML document.

This class follows the same pattern as XMLMediaValidation and XMLAccessibilityDataValidation.
It iterates through all graphic elements found in the document and validates each one.
"""

def __init__(self, xmltree, params):
self.params = params
self.xml_graphic = XmlGraphic(xmltree)

def validate(self):
"""
Validate all graphic and inline-graphic elements in the document.

Yields validation results for each graphic element found.
"""
for data in self.xml_graphic.data:
validator = GraphicValidation(data, self.params)
yield from validator.validate()
17 changes: 17 additions & 0 deletions packtools/sps/validation/xml_validations.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@

from packtools.sps.validation.supplementary_material import XmlSupplementaryMaterialValidation
from packtools.sps.validation.ext_link import ExtLinkValidation
from packtools.sps.validation.graphic import XMLGraphicValidation


def validate_affiliations(xmltree, params):
Expand Down Expand Up @@ -325,3 +326,19 @@ def validate_ext_links(xmltree, params):
yield from validator.validate_ext_link_type_value()
yield from validator.validate_descriptive_text()
yield from validator.validate_xlink_title_when_generic()


def validate_graphics(xmltree, params):
"""
Validates <graphic> and <inline-graphic> elements according to SPS 1.10 specification.

Validates:
- @id attribute (required for both <graphic> and <inline-graphic>)
- @xlink:href attribute (required)
- File extensions (.jpg, .jpeg, .png, .tif, .tiff, .svg)
- .svg only allowed inside <alternatives>
- Accessibility elements (<alt-text>, <long-desc>)
"""
Comment on lines +332 to +341
Copy link

Copilot AI Mar 9, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The docstring for validate_graphics() says it validates accessibility elements (<alt-text>, <long-desc>), but XMLGraphicValidation/GraphicValidation no longer runs accessibility validation (it’s handled by XMLAccessibilityDataValidation). Update the docstring to avoid misleading consumers about what the graphic group covers.

Copilot uses AI. Check for mistakes.
graphic_rules = params["graphic_rules"]
validator = XMLGraphicValidation(xmltree, graphic_rules)
yield from validator.validate()
4 changes: 4 additions & 0 deletions packtools/sps/validation/xml_validator.py
Original file line number Diff line number Diff line change
Expand Up @@ -149,3 +149,7 @@ def validate_xml_content(xmltree, rules):
"group": "ext-link",
"items": xml_validations.validate_ext_links(xmltree, params),
}
yield {
"group": "graphic",
"items": xml_validations.validate_graphics(xmltree, params),
}
26 changes: 26 additions & 0 deletions packtools/sps/validation_rules/graphic_rules.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
{
"graphic_rules": {
"media_attributes_error_level": "CRITICAL",
"xlink_href_error_level": "ERROR",
"valid_extension": ["jpg", "jpeg", "png", "tif", "tiff", "svg"],
"svg_error_level": "ERROR",
"alt_text_exist_error_level": "WARNING",
"alt_text_content_error_level": "CRITICAL",
"alt_text_media_restriction_error_level": "ERROR",
"alt_text_duplication_error_level": "WARNING",
"decorative_alt_text_error_level": "INFO",
"long_desc_exist_error_level": "WARNING",
"long_desc_content_error_level": "CRITICAL",
"long_desc_minimum_length_error_level": "ERROR",
"long_desc_media_restriction_error_level": "ERROR",
"long_desc_duplication_error_level": "WARNING",
"long_desc_occurrence_error_level": "ERROR",
"long_desc_null_incompatibility_error_level": "WARNING",
"xref_transcript_error_level": "WARNING",
"transcript_error_level": "WARNING",
"content_type_error_level": "CRITICAL",
"speaker_speech_error_level": "WARNING",
"structure_error_level": "CRITICAL",
"content_types": ["machine-generated"]
Comment on lines +2 to +24
Copy link

Copilot AI Mar 9, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

graphic_rules.json includes many accessibility-related parameters (alt_text_*, long_desc_*, transcript/content-type, etc.), but GraphicValidation no longer performs accessibility validation and the pipeline already has accessibility_data_rules.json for that. Keeping duplicated knobs here risks the two rule sets diverging and makes it unclear which settings are actually used; consider trimming graphic_rules to only the keys consumed by the graphic validator (id/xlink/extension/svg).

Copilot uses AI. Check for mistakes.
}
}
Loading
Loading