diff --git a/packtools/sps/formats/pdf/assets/CROSSMARK_Color_horizontal.png b/packtools/sps/formats/pdf/assets/CROSSMARK_Color_horizontal.png
new file mode 100644
index 000000000..f15d999fb
Binary files /dev/null and b/packtools/sps/formats/pdf/assets/CROSSMARK_Color_horizontal.png differ
diff --git a/packtools/sps/formats/pdf/crossmark.py b/packtools/sps/formats/pdf/crossmark.py
new file mode 100644
index 000000000..efc4096cd
--- /dev/null
+++ b/packtools/sps/formats/pdf/crossmark.py
@@ -0,0 +1,478 @@
+"""
+crossmark.py - Add Crossmark button to PDF files.
+
+This module provides functionality to insert the Crossmark logo and hyperlink
+into PDF files, along with relevant XMP metadata, as required by Crossref.
+
+Reference: https://www.crossref.org/documentation/crossmark/
+"""
+
+import argparse
+import csv
+import io
+import os
+from xml.etree import ElementTree
+
+from pypdf import PdfReader, PdfWriter, generic
+from reportlab.pdfgen import canvas as rl_canvas
+
+try:
+ from PIL import Image as PILImage
+ _PIL_AVAILABLE = True
+except ImportError:
+ _PIL_AVAILABLE = False
+
+
+# Default logo bundled with the package
+_ASSETS_DIR = os.path.join(os.path.dirname(__file__), "assets")
+_DEFAULT_LOGO = os.path.join(_ASSETS_DIR, "CROSSMARK_Color_horizontal.png")
+
+# Crossmark dialog URL template
+_CROSSMARK_URL = (
+ "https://crossmark.crossref.org/dialog"
+ "?doi={doi}&domain=pdf&date_stamp={date_stamp}"
+)
+
+# XMP namespaces used for Crossmark metadata
+_XMP_NAMESPACES = {
+ "dc": "http://purl.org/dc/elements/1.1/",
+ "prism": "http://prismstandard.org/namespaces/basic/2.0/",
+ "crossmark": "http://crossref.org/crossmark/1.0/",
+ "pdfx": "http://ns.adobe.com/pdfx/1.3/",
+}
+
+# Mapping: XMP field -> value template key
+_XMP_FIELDS = {
+ "dc:identifier": "doi:{doi}",
+ "prism:doi": "{doi}",
+ "prism:url": "https://doi.org/{doi}",
+ "crossmark:MajorVersionDate": "{date_stamp}",
+ "crossmark:DOI": "{doi}",
+ "pdfx:doi": "{doi}",
+ "pdfx:CrossmarkMajorVersionDate": "{date_stamp}",
+}
+
+
+def _compute_logo_rect(page_width, page_height, position, logo_width, logo_height,
+ margin=20):
+ """
+ Compute the (x, y) lower-left position for the logo given position name.
+
+ PDF coordinate system has origin at bottom-left.
+
+ Args:
+ page_width (float): Width of the page in points.
+ page_height (float): Height of the page in points.
+ position (str): One of 'top-right', 'top-left', 'bottom-right', 'bottom-left'.
+ logo_width (float): Width of the logo in points.
+ logo_height (float): Height of the logo in points.
+ margin (int): Margin from page edge in points.
+
+ Returns:
+ tuple: (x, y) lower-left coordinates for the logo.
+ """
+ if position == "top-right":
+ x = page_width - logo_width - margin
+ y = page_height - logo_height - margin
+ elif position == "top-left":
+ x = margin
+ y = page_height - logo_height - margin
+ elif position == "bottom-right":
+ x = page_width - logo_width - margin
+ y = margin
+ elif position == "bottom-left":
+ x = margin
+ y = margin
+ else:
+ # Default to top-right
+ x = page_width - logo_width - margin
+ y = page_height - logo_height - margin
+ return x, y
+
+
+def _get_logo_height(logo_path, logo_width):
+ """
+ Calculate logo height maintaining aspect ratio.
+
+ Args:
+ logo_path (str): Path to the logo image file.
+ logo_width (int): Desired width in points.
+
+ Returns:
+ float: Calculated height in points.
+ """
+ if _PIL_AVAILABLE:
+ try:
+ with PILImage.open(logo_path) as img:
+ orig_w, orig_h = img.size
+ return logo_width * orig_h / orig_w
+ except Exception:
+ pass
+ # Fallback: assume typical horizontal logo aspect ratio ~4:1
+ return logo_width / 4.0
+
+
+def _build_xmp_packet(doi, date_stamp):
+ """
+ Build an XMP metadata packet string for Crossmark.
+
+ Args:
+ doi (str): DOI of the article (e.g., '10.1590/s0100-12345').
+ date_stamp (str): Date of the major version (e.g., '2026-01-15').
+
+ Returns:
+ bytes: UTF-8 encoded XMP packet.
+ """
+ fields_xml = "\n".join(
+ f" <{field}>{value.format(doi=doi, date_stamp=date_stamp)}{field}>"
+ for field, value in _XMP_FIELDS.items()
+ )
+
+ ns_attrs = "\n ".join(
+ f'xmlns:{prefix}="{uri}"'
+ for prefix, uri in _XMP_NAMESPACES.items()
+ )
+
+ xmp = (
+ "\n"
+ "\n"
+ " \n"
+ " \n"
+ f"{fields_xml}\n"
+ " \n"
+ " \n"
+ "\n"
+ ""
+ )
+ return xmp.encode("utf-8")
+
+
+def _merge_xmp_packet(existing_xmp_bytes, doi, date_stamp):
+ """
+ Merge Crossmark fields into an existing XMP packet, or create a new one.
+
+ When existing XMP is present, the Crossmark fields are added to the
+ existing rdf:Description block. If a field already exists it is updated.
+
+ Args:
+ existing_xmp_bytes (bytes | None): Existing XMP packet bytes, or None.
+ doi (str): DOI of the article.
+ date_stamp (str): Date stamp string.
+
+ Returns:
+ bytes: Updated XMP packet bytes.
+ """
+ if not existing_xmp_bytes:
+ return _build_xmp_packet(doi, date_stamp)
+
+ # Register namespaces to avoid ns0 mangling
+ for prefix, uri in _XMP_NAMESPACES.items():
+ ElementTree.register_namespace(prefix, uri)
+ ElementTree.register_namespace("x", "adobe:ns:meta/")
+ ElementTree.register_namespace(
+ "rdf", "http://www.w3.org/1999/02/22-rdf-syntax-ns#"
+ )
+
+ try:
+ root = ElementTree.fromstring(existing_xmp_bytes)
+ except ElementTree.ParseError:
+ return _build_xmp_packet(doi, date_stamp)
+
+ rdf_ns = "http://www.w3.org/1999/02/22-rdf-syntax-ns#"
+ desc_tag = f"{{{rdf_ns}}}Description"
+ desc = root.find(f".//{desc_tag}")
+ if desc is None:
+ return _build_xmp_packet(doi, date_stamp)
+
+ new_values = {
+ field: value.format(doi=doi, date_stamp=date_stamp)
+ for field, value in _XMP_FIELDS.items()
+ }
+
+ for prefixed_field, text in new_values.items():
+ prefix, local = prefixed_field.split(":", 1)
+ ns_uri = _XMP_NAMESPACES[prefix]
+ clark = f"{{{ns_uri}}}{local}"
+ elem = desc.find(clark)
+ if elem is None:
+ elem = ElementTree.SubElement(desc, clark)
+ elem.text = text
+
+ xmp_str = ElementTree.tostring(root, encoding="unicode", xml_declaration=False)
+ return (
+ "\n"
+ + xmp_str
+ + "\n"
+ ).encode("utf-8")
+
+
+def _create_logo_overlay(page_width, page_height, x, y, logo_width, logo_height,
+ logo_path):
+ """
+ Create a single-page PDF overlay containing the Crossmark logo.
+
+ Args:
+ page_width (float): Width of the page in points.
+ page_height (float): Height of the page in points.
+ x (float): Left coordinate of the logo.
+ y (float): Bottom coordinate of the logo.
+ logo_width (float): Width of the logo in points.
+ logo_height (float): Height of the logo in points.
+ logo_path (str): Path to the logo image file.
+
+ Returns:
+ io.BytesIO: Buffer containing the overlay PDF.
+ """
+ buf = io.BytesIO()
+ c = rl_canvas.Canvas(buf, pagesize=(page_width, page_height))
+ c.drawImage(
+ logo_path,
+ x, y,
+ width=logo_width,
+ height=logo_height,
+ mask="auto",
+ preserveAspectRatio=True,
+ )
+ c.save()
+ buf.seek(0)
+ return buf
+
+
+def add_crossmark(
+ input_pdf,
+ output_pdf,
+ doi,
+ date_stamp,
+ logo_path=None,
+ position="top-right",
+ width=150,
+):
+ """
+ Insert the Crossmark logo with hyperlink and XMP metadata into a PDF.
+
+ The logo is placed on the first page only. All other pages are left
+ unchanged. Existing content is preserved.
+
+ Args:
+ input_pdf (str): Path to the input PDF file.
+ output_pdf (str): Path for the output PDF file.
+ doi (str): DOI of the article (e.g., '10.1590/s0100-12345').
+ date_stamp (str): Date of the major version in YYYY-MM-DD format.
+ logo_path (str | None): Path to the Crossmark logo PNG/JPEG image.
+ Defaults to the bundled ``CROSSMARK_Color_horizontal.png``.
+ position (str): Logo position on the first page. One of
+ ``'top-right'``, ``'top-left'``, ``'bottom-right'``,
+ ``'bottom-left'``. Defaults to ``'top-right'``.
+ width (int): Desired logo width in points (1 pt ≈ 1/72 inch).
+ Height is calculated automatically to preserve aspect ratio.
+ Defaults to 150.
+
+ Returns:
+ None
+
+ Raises:
+ FileNotFoundError: If ``input_pdf`` or ``logo_path`` does not exist.
+ ValueError: If ``doi`` or ``date_stamp`` is empty.
+ """
+ if not doi:
+ raise ValueError("doi must not be empty")
+ if not date_stamp:
+ raise ValueError("date_stamp must not be empty")
+
+ if logo_path is None:
+ logo_path = _DEFAULT_LOGO
+
+ if not os.path.exists(input_pdf):
+ raise FileNotFoundError(f"Input PDF not found: {input_pdf}")
+ if not os.path.exists(logo_path):
+ raise FileNotFoundError(f"Logo file not found: {logo_path}")
+
+ reader = PdfReader(input_pdf)
+ writer = PdfWriter(clone_from=reader)
+
+ # --- Process first page: add logo overlay ---
+ first_page = writer.pages[0]
+ page_width = float(first_page.mediabox.width)
+ page_height = float(first_page.mediabox.height)
+
+ logo_height = _get_logo_height(logo_path, width)
+ x, y = _compute_logo_rect(page_width, page_height, position, width, logo_height)
+
+ overlay_buf = _create_logo_overlay(
+ page_width, page_height, x, y, width, logo_height, logo_path
+ )
+ overlay_page = PdfReader(overlay_buf).pages[0]
+ first_page.merge_page(overlay_page)
+
+ # --- Add URI annotation (clickable hyperlink) on first page ---
+ crossmark_url = _CROSSMARK_URL.format(doi=doi, date_stamp=date_stamp)
+ annotation_rect = [x, y, x + width, y + logo_height]
+ writer.add_uri(0, crossmark_url, annotation_rect)
+
+ # --- Update XMP metadata ---
+ existing_xmp = None
+ meta_ref = reader.root_object.get("/Metadata")
+ if meta_ref is not None:
+ try:
+ existing_xmp = meta_ref.get_object().get_data()
+ except Exception:
+ pass
+
+ xmp_bytes = _merge_xmp_packet(existing_xmp, doi, date_stamp)
+ xmp_stream = generic.DecodedStreamObject()
+ xmp_stream.set_data(xmp_bytes)
+ xmp_stream.update({
+ generic.NameObject("/Type"): generic.NameObject("/Metadata"),
+ generic.NameObject("/Subtype"): generic.NameObject("/XML"),
+ })
+ xmp_ref = writer._add_object(xmp_stream)
+ writer.root_object[generic.NameObject("/Metadata")] = xmp_ref
+
+ # --- Write output ---
+ output_dir = os.path.dirname(output_pdf)
+ if output_dir:
+ os.makedirs(output_dir, exist_ok=True)
+
+ with open(output_pdf, "wb") as f:
+ writer.write(f)
+
+
+def main():
+ """CLI entry point for adding Crossmark to PDF files."""
+ parser = argparse.ArgumentParser(
+ description=(
+ "Insert the Crossmark logo and metadata into a PDF file. "
+ "Supports single-file and batch (CSV) modes."
+ )
+ )
+
+ # Single-file mode arguments
+ parser.add_argument(
+ "--input",
+ help="Path to the input PDF file.",
+ )
+ parser.add_argument(
+ "--output",
+ help="Path for the output PDF file.",
+ )
+ parser.add_argument(
+ "--doi",
+ help="DOI of the article (e.g., 10.1590/s0100-12345).",
+ )
+ parser.add_argument(
+ "--date-stamp",
+ dest="date_stamp",
+ help="Date of the last major version in YYYY-MM-DD format.",
+ )
+
+ # Batch mode
+ parser.add_argument(
+ "--csv",
+ dest="csv_file",
+ help=(
+ "CSV file for batch processing. "
+ "Expected columns: doi, input_pdf, output_pdf, date_stamp. "
+ "(output_pdf is optional; if omitted, a suffix '_cm' is added.)"
+ ),
+ )
+
+ # Common options
+ parser.add_argument(
+ "--logo",
+ dest="logo_path",
+ default=None,
+ help=(
+ "Path to the Crossmark logo image. "
+ "Defaults to the bundled CROSSMARK_Color_horizontal.png."
+ ),
+ )
+ parser.add_argument(
+ "--position",
+ default="top-right",
+ choices=["top-right", "top-left", "bottom-right", "bottom-left"],
+ help="Position of the logo on the first page (default: top-right).",
+ )
+ parser.add_argument(
+ "--width",
+ type=int,
+ default=150,
+ help="Logo width in points (default: 150).",
+ )
+
+ args = parser.parse_args()
+
+ if args.csv_file:
+ # Batch mode
+ if not os.path.exists(args.csv_file):
+ parser.error(f"CSV file not found: {args.csv_file}")
+
+ with open(args.csv_file, newline="", encoding="utf-8") as f:
+ reader = csv.DictReader(f)
+ rows = list(reader)
+
+ for i, row in enumerate(rows, start=1):
+ doi = row.get("doi", "").strip()
+ input_pdf = row.get("input_pdf", "").strip()
+ date_stamp = row.get("date_stamp", "").strip()
+ output_pdf = row.get("output_pdf", "").strip()
+
+ if not doi or not input_pdf or not date_stamp:
+ print(
+ f"[Row {i}] Skipping: missing required fields "
+ f"(doi, input_pdf, date_stamp)."
+ )
+ continue
+
+ if not output_pdf:
+ base, ext = os.path.splitext(input_pdf)
+ output_pdf = f"{base}_cm{ext}"
+
+ try:
+ add_crossmark(
+ input_pdf=input_pdf,
+ output_pdf=output_pdf,
+ doi=doi,
+ date_stamp=date_stamp,
+ logo_path=args.logo_path,
+ position=args.position,
+ width=args.width,
+ )
+ print(f"[Row {i}] Created: {output_pdf}")
+ except Exception as exc:
+ print(f"[Row {i}] Error processing {input_pdf}: {exc}")
+
+ else:
+ # Single-file mode
+ missing = []
+ if not args.input:
+ missing.append("--input")
+ if not args.doi:
+ missing.append("--doi")
+ if not args.date_stamp:
+ missing.append("--date-stamp")
+ if missing:
+ parser.error(
+ f"The following arguments are required in single-file mode: "
+ + ", ".join(missing)
+ )
+
+ output_pdf = args.output
+ if not output_pdf:
+ base, ext = os.path.splitext(args.input)
+ output_pdf = f"{base}_cm{ext}"
+
+ add_crossmark(
+ input_pdf=args.input,
+ output_pdf=output_pdf,
+ doi=args.doi,
+ date_stamp=args.date_stamp,
+ logo_path=args.logo_path,
+ position=args.position,
+ width=args.width,
+ )
+ print(f"Created: {output_pdf}")
+
+
+if __name__ == "__main__":
+ main()
diff --git a/setup.py b/setup.py
index 880f6a9c8..4cb4687ce 100644
--- a/setup.py
+++ b/setup.py
@@ -24,6 +24,8 @@
'Pillow',
'openpyxl>=3.1.5',
'python-docx>=1.1.2',
+ 'pypdf>=3.0.0',
+ 'reportlab>=3.6.0',
]
@@ -85,6 +87,7 @@
"package_optimiser=packtools.package_optimiser:main",
"package_maker=packtools.package_maker:main",
"pdf_generator=packtools.sps.formats.pdf_generator:main",
+ "crossmark_pdf=packtools.sps.formats.pdf.crossmark:main",
]
}
)
diff --git a/tests/sps/formats/pdf/test_crossmark.py b/tests/sps/formats/pdf/test_crossmark.py
new file mode 100644
index 000000000..04b83ddec
--- /dev/null
+++ b/tests/sps/formats/pdf/test_crossmark.py
@@ -0,0 +1,506 @@
+"""
+Tests for packtools.sps.formats.pdf.crossmark
+"""
+
+import io
+import os
+import csv
+import tempfile
+import unittest
+from unittest.mock import patch, MagicMock
+
+from pypdf import PdfReader, PdfWriter
+from reportlab.pdfgen import canvas as rl_canvas
+from reportlab.lib.pagesizes import A4, letter
+
+from packtools.sps.formats.pdf.crossmark import (
+ _compute_logo_rect,
+ _get_logo_height,
+ _build_xmp_packet,
+ _merge_xmp_packet,
+ _create_logo_overlay,
+ add_crossmark,
+ _DEFAULT_LOGO,
+)
+
+
+# ---------------------------------------------------------------------------
+# Helpers
+# ---------------------------------------------------------------------------
+
+def _make_simple_pdf(pagesize=letter, num_pages=1):
+ """Return a BytesIO containing a minimal PDF with `num_pages` pages."""
+ buf = io.BytesIO()
+ c = rl_canvas.Canvas(buf, pagesize=pagesize)
+ for i in range(num_pages):
+ c.drawString(72, 500, f"Page {i + 1}")
+ if i < num_pages - 1:
+ c.showPage()
+ c.save()
+ buf.seek(0)
+ return buf
+
+
+def _write_temp_pdf(tmp_dir, pagesize=letter, num_pages=1, filename="test.pdf"):
+ """Write a minimal PDF to *tmp_dir* and return its path."""
+ path = os.path.join(tmp_dir, filename)
+ buf = _make_simple_pdf(pagesize=pagesize, num_pages=num_pages)
+ with open(path, "wb") as f:
+ f.write(buf.read())
+ return path
+
+
+# ---------------------------------------------------------------------------
+# Unit tests: _compute_logo_rect
+# ---------------------------------------------------------------------------
+
+class TestComputeLogoRect(unittest.TestCase):
+
+ def test_top_right(self):
+ x, y = _compute_logo_rect(612, 792, "top-right", 150, 40, margin=20)
+ self.assertAlmostEqual(x, 612 - 150 - 20)
+ self.assertAlmostEqual(y, 792 - 40 - 20)
+
+ def test_top_left(self):
+ x, y = _compute_logo_rect(612, 792, "top-left", 150, 40, margin=20)
+ self.assertAlmostEqual(x, 20)
+ self.assertAlmostEqual(y, 792 - 40 - 20)
+
+ def test_bottom_right(self):
+ x, y = _compute_logo_rect(612, 792, "bottom-right", 150, 40, margin=20)
+ self.assertAlmostEqual(x, 612 - 150 - 20)
+ self.assertAlmostEqual(y, 20)
+
+ def test_bottom_left(self):
+ x, y = _compute_logo_rect(612, 792, "bottom-left", 150, 40, margin=20)
+ self.assertAlmostEqual(x, 20)
+ self.assertAlmostEqual(y, 20)
+
+ def test_unknown_position_defaults_to_top_right(self):
+ x, y = _compute_logo_rect(612, 792, "unknown", 150, 40, margin=20)
+ self.assertAlmostEqual(x, 612 - 150 - 20)
+ self.assertAlmostEqual(y, 792 - 40 - 20)
+
+
+# ---------------------------------------------------------------------------
+# Unit tests: _get_logo_height
+# ---------------------------------------------------------------------------
+
+class TestGetLogoHeight(unittest.TestCase):
+
+ def test_with_default_logo(self):
+ h = _get_logo_height(_DEFAULT_LOGO, 200)
+ self.assertGreater(h, 0)
+
+ def test_fallback_on_missing_file(self):
+ h = _get_logo_height("/nonexistent/logo.png", 200)
+ # Fallback: width / 4.0
+ self.assertAlmostEqual(h, 50.0)
+
+
+# ---------------------------------------------------------------------------
+# Unit tests: _build_xmp_packet
+# ---------------------------------------------------------------------------
+
+class TestBuildXmpPacket(unittest.TestCase):
+
+ def setUp(self):
+ self.doi = "10.1590/s0100-12345"
+ self.date_stamp = "2026-01-15"
+ self.xmp = _build_xmp_packet(self.doi, self.date_stamp).decode("utf-8")
+
+ def test_contains_xpacket_begin(self):
+ self.assertIn("{self.doi}", self.xmp)
+
+ def test_contains_prism_url(self):
+ self.assertIn(f"https://doi.org/{self.doi}", self.xmp)
+
+ def test_contains_crossmark_major_version_date(self):
+ self.assertIn(
+ f"{self.date_stamp}",
+ self.xmp,
+ )
+
+ def test_contains_crossmark_doi(self):
+ self.assertIn(f"{self.doi}", self.xmp)
+
+ def test_contains_pdfx_doi(self):
+ self.assertIn(f"{self.doi}", self.xmp)
+
+ def test_contains_pdfx_crossmark_date(self):
+ self.assertIn(
+ f"{self.date_stamp}",
+ self.xmp,
+ )
+
+
+# ---------------------------------------------------------------------------
+# Unit tests: _merge_xmp_packet
+# ---------------------------------------------------------------------------
+
+class TestMergeXmpPacket(unittest.TestCase):
+
+ def test_merge_with_no_existing_returns_fresh_packet(self):
+ result = _merge_xmp_packet(None, "10.1234/test", "2026-01-01")
+ self.assertIn("10.1234/test", result.decode("utf-8"))
+
+ def test_merge_with_invalid_xml_returns_fresh_packet(self):
+ result = _merge_xmp_packet(b">>", "10.1234/test", "2026-01-01")
+ self.assertIn("10.1234/test", result.decode("utf-8"))
+
+ def test_merge_preserves_existing_fields(self):
+ existing = b"""
+
+
+
+ My Article
+
+
+
+"""
+ result = _merge_xmp_packet(existing, "10.1590/abc", "2026-02-01")
+ text = result.decode("utf-8")
+ # Existing field preserved
+ self.assertIn("My Article", text)
+ # New fields added
+ self.assertIn("10.1590/abc", text)
+
+ def test_merge_updates_existing_crossmark_field(self):
+ existing = b"""
+
+
+
+ 10.OLD/doi
+
+
+
+"""
+ result = _merge_xmp_packet(existing, "10.NEW/doi", "2026-01-01")
+ text = result.decode("utf-8")
+ self.assertIn("10.NEW/doi", text)
+ self.assertNotIn("10.OLD/doi", text)
+
+
+# ---------------------------------------------------------------------------
+# Integration tests: add_crossmark
+# ---------------------------------------------------------------------------
+
+class TestAddCrossmark(unittest.TestCase):
+
+ DOI = "10.1590/s0100-12345"
+ DATE_STAMP = "2026-01-15"
+
+ def test_basic_single_page_pdf(self):
+ with tempfile.TemporaryDirectory() as tmp:
+ input_pdf = _write_temp_pdf(tmp, filename="input.pdf")
+ output_pdf = os.path.join(tmp, "output.pdf")
+
+ add_crossmark(input_pdf, output_pdf, self.DOI, self.DATE_STAMP)
+
+ self.assertTrue(os.path.exists(output_pdf))
+ self.assertGreater(os.path.getsize(output_pdf), 0)
+
+ def test_output_has_same_page_count(self):
+ with tempfile.TemporaryDirectory() as tmp:
+ input_pdf = _write_temp_pdf(tmp, num_pages=3, filename="input.pdf")
+ output_pdf = os.path.join(tmp, "output.pdf")
+
+ add_crossmark(input_pdf, output_pdf, self.DOI, self.DATE_STAMP)
+
+ reader = PdfReader(output_pdf)
+ self.assertEqual(len(reader.pages), 3)
+
+ def test_output_contains_uri_annotation(self):
+ with tempfile.TemporaryDirectory() as tmp:
+ input_pdf = _write_temp_pdf(tmp, filename="input.pdf")
+ output_pdf = os.path.join(tmp, "output.pdf")
+
+ add_crossmark(input_pdf, output_pdf, self.DOI, self.DATE_STAMP)
+
+ reader = PdfReader(output_pdf)
+ first_page = reader.pages[0]
+ annots = first_page.get("/Annots")
+ self.assertIsNotNone(annots, "No annotations found on first page")
+
+ # Find the URI annotation with crossmark URL
+ found_uri = False
+ for annot_ref in annots:
+ annot = annot_ref.get_object()
+ action = annot.get("/A")
+ if action:
+ uri = str(action.get("/URI", ""))
+ if uri.startswith("https://crossmark.crossref.org/"):
+ found_uri = True
+ break
+ self.assertTrue(found_uri, "Crossmark URI annotation not found")
+
+ def test_crossmark_url_contains_doi(self):
+ with tempfile.TemporaryDirectory() as tmp:
+ input_pdf = _write_temp_pdf(tmp, filename="input.pdf")
+ output_pdf = os.path.join(tmp, "output.pdf")
+
+ add_crossmark(input_pdf, output_pdf, self.DOI, self.DATE_STAMP)
+
+ reader = PdfReader(output_pdf)
+ first_page = reader.pages[0]
+ annots = first_page.get("/Annots")
+
+ for annot_ref in annots:
+ annot = annot_ref.get_object()
+ action = annot.get("/A")
+ if action:
+ uri = str(action.get("/URI", ""))
+ if uri.startswith("https://crossmark.crossref.org/"):
+ self.assertIn(self.DOI, uri)
+ self.assertIn(self.DATE_STAMP, uri)
+ return
+
+ self.fail("Crossmark URI annotation not found")
+
+ def test_output_contains_xmp_metadata(self):
+ with tempfile.TemporaryDirectory() as tmp:
+ input_pdf = _write_temp_pdf(tmp, filename="input.pdf")
+ output_pdf = os.path.join(tmp, "output.pdf")
+
+ add_crossmark(input_pdf, output_pdf, self.DOI, self.DATE_STAMP)
+
+ reader = PdfReader(output_pdf)
+ meta_ref = reader.root_object.get("/Metadata")
+ self.assertIsNotNone(meta_ref, "XMP metadata stream not found in PDF")
+
+ xmp_text = meta_ref.get_object().get_data().decode("utf-8")
+ self.assertIn(self.DOI, xmp_text)
+ self.assertIn(self.DATE_STAMP, xmp_text)
+
+ def test_xmp_contains_all_required_fields(self):
+ with tempfile.TemporaryDirectory() as tmp:
+ input_pdf = _write_temp_pdf(tmp, filename="input.pdf")
+ output_pdf = os.path.join(tmp, "output.pdf")
+
+ add_crossmark(input_pdf, output_pdf, self.DOI, self.DATE_STAMP)
+
+ reader = PdfReader(output_pdf)
+ xmp_text = (
+ reader.root_object["/Metadata"].get_object().get_data().decode("utf-8")
+ )
+
+ expected_fragments = [
+ f"doi:{self.DOI}", # dc:identifier
+ f"{self.DOI}",
+ f"https://doi.org/{self.DOI}", # prism:url
+ f"{self.DOI}",
+ f"{self.DOI}",
+ self.DATE_STAMP,
+ ]
+ for fragment in expected_fragments:
+ self.assertIn(fragment, xmp_text, f"Missing fragment: {fragment}")
+
+ def test_a4_pdf(self):
+ with tempfile.TemporaryDirectory() as tmp:
+ input_pdf = _write_temp_pdf(tmp, pagesize=A4, filename="a4.pdf")
+ output_pdf = os.path.join(tmp, "a4_cm.pdf")
+
+ add_crossmark(input_pdf, output_pdf, self.DOI, self.DATE_STAMP)
+
+ reader = PdfReader(output_pdf)
+ self.assertEqual(len(reader.pages), 1)
+
+ def test_position_top_left(self):
+ with tempfile.TemporaryDirectory() as tmp:
+ input_pdf = _write_temp_pdf(tmp, filename="input.pdf")
+ output_pdf = os.path.join(tmp, "output.pdf")
+
+ add_crossmark(
+ input_pdf, output_pdf, self.DOI, self.DATE_STAMP, position="top-left"
+ )
+ self.assertTrue(os.path.exists(output_pdf))
+
+ def test_custom_width(self):
+ with tempfile.TemporaryDirectory() as tmp:
+ input_pdf = _write_temp_pdf(tmp, filename="input.pdf")
+ output_pdf = os.path.join(tmp, "output.pdf")
+
+ add_crossmark(
+ input_pdf, output_pdf, self.DOI, self.DATE_STAMP, width=100
+ )
+ self.assertTrue(os.path.exists(output_pdf))
+
+ def test_missing_input_raises(self):
+ with tempfile.TemporaryDirectory() as tmp:
+ with self.assertRaises(FileNotFoundError):
+ add_crossmark(
+ "/nonexistent/input.pdf",
+ os.path.join(tmp, "out.pdf"),
+ self.DOI,
+ self.DATE_STAMP,
+ )
+
+ def test_missing_logo_raises(self):
+ with tempfile.TemporaryDirectory() as tmp:
+ input_pdf = _write_temp_pdf(tmp, filename="input.pdf")
+ with self.assertRaises(FileNotFoundError):
+ add_crossmark(
+ input_pdf,
+ os.path.join(tmp, "out.pdf"),
+ self.DOI,
+ self.DATE_STAMP,
+ logo_path="/nonexistent/logo.png",
+ )
+
+ def test_empty_doi_raises(self):
+ with tempfile.TemporaryDirectory() as tmp:
+ input_pdf = _write_temp_pdf(tmp, filename="input.pdf")
+ with self.assertRaises(ValueError):
+ add_crossmark(
+ input_pdf,
+ os.path.join(tmp, "out.pdf"),
+ "",
+ self.DATE_STAMP,
+ )
+
+ def test_empty_date_stamp_raises(self):
+ with tempfile.TemporaryDirectory() as tmp:
+ input_pdf = _write_temp_pdf(tmp, filename="input.pdf")
+ with self.assertRaises(ValueError):
+ add_crossmark(
+ input_pdf,
+ os.path.join(tmp, "out.pdf"),
+ self.DOI,
+ "",
+ )
+
+ def test_output_directory_created_if_missing(self):
+ with tempfile.TemporaryDirectory() as tmp:
+ input_pdf = _write_temp_pdf(tmp, filename="input.pdf")
+ output_pdf = os.path.join(tmp, "subdir", "output.pdf")
+
+ add_crossmark(input_pdf, output_pdf, self.DOI, self.DATE_STAMP)
+ self.assertTrue(os.path.exists(output_pdf))
+
+
+# ---------------------------------------------------------------------------
+# CLI tests
+# ---------------------------------------------------------------------------
+
+class TestCLIMain(unittest.TestCase):
+
+ DOI = "10.1590/s0100-12345"
+ DATE_STAMP = "2026-01-15"
+
+ def test_single_file_mode(self):
+ with tempfile.TemporaryDirectory() as tmp:
+ input_pdf = _write_temp_pdf(tmp, filename="input.pdf")
+ output_pdf = os.path.join(tmp, "output.pdf")
+
+ import sys
+ from packtools.sps.formats.pdf.crossmark import main
+
+ with patch.object(
+ sys,
+ "argv",
+ [
+ "crossmark_pdf",
+ "--input", input_pdf,
+ "--output", output_pdf,
+ "--doi", self.DOI,
+ "--date-stamp", self.DATE_STAMP,
+ ],
+ ):
+ main()
+
+ self.assertTrue(os.path.exists(output_pdf))
+
+ def test_single_file_default_output_name(self):
+ with tempfile.TemporaryDirectory() as tmp:
+ input_pdf = _write_temp_pdf(tmp, filename="article.pdf")
+ expected_output = os.path.join(tmp, "article_cm.pdf")
+
+ import sys
+ from packtools.sps.formats.pdf.crossmark import main
+
+ with patch.object(
+ sys,
+ "argv",
+ [
+ "crossmark_pdf",
+ "--input", input_pdf,
+ "--doi", self.DOI,
+ "--date-stamp", self.DATE_STAMP,
+ ],
+ ):
+ main()
+
+ self.assertTrue(os.path.exists(expected_output))
+
+ def test_batch_csv_mode(self):
+ with tempfile.TemporaryDirectory() as tmp:
+ input1 = _write_temp_pdf(tmp, filename="a1.pdf")
+ input2 = _write_temp_pdf(tmp, filename="a2.pdf")
+ out1 = os.path.join(tmp, "a1_cm.pdf")
+ out2 = os.path.join(tmp, "a2_cm.pdf")
+
+ csv_path = os.path.join(tmp, "batch.csv")
+ with open(csv_path, "w", newline="") as f:
+ w = csv.writer(f)
+ w.writerow(["doi", "input_pdf", "date_stamp", "output_pdf"])
+ w.writerow([self.DOI, input1, self.DATE_STAMP, out1])
+ w.writerow(["10.1590/other", input2, "2025-06-01", out2])
+
+ import sys
+ from packtools.sps.formats.pdf.crossmark import main
+
+ with patch.object(sys, "argv", ["crossmark_pdf", "--csv", csv_path]):
+ main()
+
+ self.assertTrue(os.path.exists(out1))
+ self.assertTrue(os.path.exists(out2))
+
+ def test_batch_csv_mode_default_output_name(self):
+ with tempfile.TemporaryDirectory() as tmp:
+ input1 = _write_temp_pdf(tmp, filename="a1.pdf")
+ expected_out = os.path.join(tmp, "a1_cm.pdf")
+
+ csv_path = os.path.join(tmp, "batch.csv")
+ with open(csv_path, "w", newline="") as f:
+ w = csv.writer(f)
+ w.writerow(["doi", "input_pdf", "date_stamp"])
+ w.writerow([self.DOI, input1, self.DATE_STAMP])
+
+ import sys
+ from packtools.sps.formats.pdf.crossmark import main
+
+ with patch.object(sys, "argv", ["crossmark_pdf", "--csv", csv_path]):
+ main()
+
+ self.assertTrue(os.path.exists(expected_out))
+
+ def test_batch_csv_skips_rows_with_missing_fields(self):
+ with tempfile.TemporaryDirectory() as tmp:
+ csv_path = os.path.join(tmp, "batch.csv")
+ with open(csv_path, "w", newline="") as f:
+ w = csv.writer(f)
+ w.writerow(["doi", "input_pdf", "date_stamp"])
+ # Row with missing doi -> should be skipped
+ w.writerow(["", os.path.join(tmp, "a1.pdf"), self.DATE_STAMP])
+
+ import sys
+ from packtools.sps.formats.pdf.crossmark import main
+
+ # Should not raise
+ with patch.object(sys, "argv", ["crossmark_pdf", "--csv", csv_path]):
+ main()
+
+
+if __name__ == "__main__":
+ unittest.main()