diff --git a/packtools/sps/validation/history.py b/packtools/sps/validation/history.py new file mode 100644 index 000000000..68d04558e --- /dev/null +++ b/packtools/sps/validation/history.py @@ -0,0 +1,527 @@ +""" +Validations for the element according to SPS 1.10 specification. + +This module implements validations for the element, which groups +historical dates for documents (received, accepted, revised, preprint, corrections, +retractions, etc.). + +Reference: https://docs.google.com/document/d/1GTv4Inc2LS_AXY-ToHT3HmO66UT0VAHWJNOIqzBNSgA/edit?tab=t.0#heading=h.history +""" + +from packtools.sps.validation.utils import build_response + + +# Allowed values for @date-type according to SPS 1.10 +ALLOWED_DATE_TYPES = [ + "received", # Date manuscript was received + "accepted", # Date manuscript was accepted + "corrected", # Date of approval of Errata or Addendum + "expression-of-concern", # Date of approval of Expression of Concern + "pub", # Publication date + "preprint", # Date published as preprint + "resubmitted", # Date manuscript was resubmitted + "retracted", # Date of approval of retraction + "rev-recd", # Date revised manuscript was received + "rev-request", # Date revisions were requested + "reviewer-report-received", # Date reviewer report was received (exclusive for @article-type="reviewer-report") +] + +# Date types that require complete date (day, month, year) +COMPLETE_DATE_REQUIRED_TYPES = [ + "received", + "accepted", + "corrected", + "retracted", + "expression-of-concern", +] + +# Article types that are exempt from received/accepted requirements +EXEMPT_ARTICLE_TYPES = [ + "correction", # errata + "retraction", + "addendum", + "expression-of-concern", + "reviewer-report", +] + +XML_LANG = "{http://www.w3.org/XML/1998/namespace}lang" + + +class HistoryValidation: + """ + Validates the element according to SPS 1.10 rules. + + Each validation method iterates independently over every relevant container + ( for the main article and each for sub-articles), + so that exemption logic, uniqueness checks, and required-date checks are all + evaluated in the correct per-document scope. + + Validation rules: + - Uniqueness of element (per container) + - Presence of @date-type attribute + - Allowed values for @date-type + - Required dates (driven by ``date_list`` in the rules JSON) with exceptions + per container article-type + - Complete date requirements for critical date types + - Minimum year requirement for all dates + """ + + def __init__(self, xmltree, params=None): + """ + Initialize HistoryValidation. + + Args: + xmltree: XML tree containing the article + params: Optional dictionary of validation parameters. + + When provided by the orchestrator via ``xml_validations.py``, + this dict is the content of the ``history_dates_rules`` key + from the pipeline configuration JSON, which has the shape:: + + { + "error_level": "CRITICAL", + "date_list": [ + {"type": "received", "required": true}, + {"type": "accepted", "required": true}, + ... + ] + } + + ``error_level`` is used as the default severity for every + validation rule. ``date_list`` drives which date types are + considered required (replaces the formerly hardcoded pair + received/accepted). + + Individual rule levels can still be overridden with the + explicit keys below (all optional): + - history_uniqueness_error_level + - date_type_presence_error_level + - date_type_value_error_level + - required_date_error_level + - complete_date_error_level + - year_presence_error_level + """ + self.xmltree = xmltree + self.params = params or {} + + # Use the JSON's single error_level as the default for every rule; + # fall back to hard-coded sensible defaults when the key is absent. + default_error_level = self.params.get("error_level", "CRITICAL") + default_uniqueness_level = self.params.get("error_level", "ERROR") + + self.params.setdefault("history_uniqueness_error_level", default_uniqueness_level) + self.params.setdefault("date_type_presence_error_level", default_error_level) + self.params.setdefault("date_type_value_error_level", default_error_level) + self.params.setdefault("required_date_error_level", default_error_level) + self.params.setdefault("complete_date_error_level", default_error_level) + self.params.setdefault("year_presence_error_level", default_error_level) + + # Build the set of required date types from date_list. + # Falls back to {"received", "accepted"} when no date_list is provided. + date_list = self.params.get("date_list", []) + if date_list: + self.required_date_types = [ + d["type"] for d in date_list if d.get("required", False) + ] + else: + self.required_date_types = ["received", "accepted"] + + # ------------------------------------------------------------------ + # Internal helpers + # ------------------------------------------------------------------ + + def _get_contexts(self): + """ + Yield one context dict per validatable container in the document. + + Each context represents either: + - the main article's , or + - a 's . + + Yields: + dict with keys: + container - the article-meta or front-stub Element + article_type - @article-type of the owning article/sub-article + lang - @xml:lang of the owning article/sub-article + parent - "article" or "sub-article" + parent_id - value of @id on , or None + """ + root = self.xmltree.find(".") + if root is not None: + article_meta = root.find("front/article-meta") + if article_meta is not None: + yield { + "container": article_meta, + "article_type": root.get("article-type"), + "lang": root.get(XML_LANG), + "parent": "article", + "parent_id": None, + } + + for sub_article in self.xmltree.xpath(".//sub-article"): + front_stub = sub_article.find("front-stub") + if front_stub is not None: + yield { + "container": front_stub, + "article_type": sub_article.get("article-type"), + "lang": sub_article.get(XML_LANG), + "parent": "sub-article", + "parent_id": sub_article.get("id"), + } + + def _build_parent_info(self, ctx): + """ + Build a parent information dict compatible with build_response() + from a context yielded by _get_contexts(). + + The ``parent`` field identifies the container document ("article" or + "sub-article"), matching the convention used by other validators such + as ArticleDoiValidation and XMLPeerReviewValidation. + """ + return { + "parent": ctx["parent"], + "parent_id": ctx["parent_id"], + "parent_article_type": ctx["article_type"], + "parent_lang": ctx["lang"], + } + + # ------------------------------------------------------------------ + # Validation rules + # ------------------------------------------------------------------ + + def validate_history_uniqueness(self): + """ + Rule 1: Validate that appears at most once per container. + + The element must appear at most once inside + and at most once inside each . A valid document may have + one in **and** one in each sub-article's + without triggering this rule. + + Yields: + dict: Validation result (one per container) + """ + for ctx in self._get_contexts(): + container = ctx["container"] + parent = self._build_parent_info(ctx) + + history_count = len(container.findall("history")) + is_valid = history_count <= 1 + + advice = None + if not is_valid: + advice = ( + f"Remove duplicate elements. " + f"Found {history_count} occurrences, expected at most 1." + ) + + yield build_response( + title="history uniqueness", + parent=parent, + item="history", + sub_item=None, + validation_type="uniqueness", + is_valid=is_valid, + expected="at most one element", + obtained=f"{history_count} element(s)", + advice=advice, + data={"history_count": history_count}, + error_level=self.params["history_uniqueness_error_level"], + ) + + def validate_date_type_presence(self): + """ + Rule 2: Validate that all elements within have @date-type. + + The @date-type attribute is required for all elements within + . Validation is scoped per container. + + Yields: + dict: Validation result for each element + """ + for ctx in self._get_contexts(): + container = ctx["container"] + parent = self._build_parent_info(ctx) + + for date_elem in container.findall("history/date"): + date_type = date_elem.get("date-type") + has_date_type = date_type is not None and date_type.strip() != "" + + day = date_elem.findtext("day") + month = date_elem.findtext("month") + year = date_elem.findtext("year") + date_parts = {"day": day, "month": month, "year": year} + + advice = None + if not has_date_type: + advice = ( + f"Add @date-type attribute to element. " + f"Date parts: {date_parts}" + ) + + yield build_response( + title="date-type presence", + parent=parent, + item="date", + sub_item="@date-type", + validation_type="exist", + is_valid=has_date_type, + expected="@date-type attribute present", + obtained=date_type if has_date_type else "missing", + advice=advice, + data=date_parts, + error_level=self.params["date_type_presence_error_level"], + ) + + def validate_date_type_values(self): + """ + Rule 3: Validate that @date-type has allowed values. + + The @date-type attribute must have one of the allowed values according + to SPS 1.10. Dates without @date-type are skipped here (covered by + Rule 2). Validation is scoped per container. + + Yields: + dict: Validation result for each element + """ + for ctx in self._get_contexts(): + container = ctx["container"] + parent = self._build_parent_info(ctx) + + for date_elem in container.findall("history/date"): + date_type = date_elem.get("date-type") + + if date_type is None or date_type.strip() == "": + continue + + is_valid = date_type in ALLOWED_DATE_TYPES + + day = date_elem.findtext("day") + month = date_elem.findtext("month") + year = date_elem.findtext("year") + date_parts = { + "day": day, + "month": month, + "year": year, + "date-type": date_type, + } + + advice = None + if not is_valid: + advice = ( + f"Change @date-type='{date_type}' to one of the " + f"allowed values: {', '.join(ALLOWED_DATE_TYPES)}" + ) + + yield build_response( + title="date-type value", + parent=parent, + item="date", + sub_item="@date-type", + validation_type="value in list", + is_valid=is_valid, + expected=ALLOWED_DATE_TYPES, + obtained=date_type, + advice=advice, + data=date_parts, + error_level=self.params["date_type_value_error_level"], + ) + + def validate_required_dates(self): + """ + Validate presence of required dates. + + Which date types are required is driven by ``date_list`` in the + ``history_dates_rules`` JSON configuration (entries with + ``"required": true``). When no configuration is provided the + validator falls back to requiring ``received`` and ``accepted``. + + Exempt article types (EXEMPT_ARTICLE_TYPES) are never required to + carry any of these dates. Each container is evaluated independently + using *its own* @article-type, so a ``reviewer-report`` sub-article + is correctly exempt even when the parent article is a + ``research-article``. + + Yields: + dict: One validation result per (container, required_date_type) + """ + for ctx in self._get_contexts(): + container = ctx["container"] + article_type = ctx["article_type"] + parent = self._build_parent_info(ctx) + + is_exempt = article_type in EXEMPT_ARTICLE_TYPES + + found_date_types = [ + d.get("date-type") + for d in container.findall("history/date") + if d.get("date-type") + ] + + for required_type in self.required_date_types: + has_date = required_type in found_date_types + date_required = not is_exempt + is_valid = has_date or not date_required + + advice = None + if not is_valid: + advice = ( + f'Add ' + "to " + ) + + yield build_response( + title=f"required date: {required_type}", + parent=parent, + item="history", + sub_item=f"date[@date-type='{required_type}']", + validation_type="exist", + is_valid=is_valid, + expected=( + f' present' + if date_required + else "not required (exempt article type)" + ), + obtained="present" if has_date else "missing", + advice=advice, + data={ + "article_type": article_type, + "is_exempt": is_exempt, + "found_date_types": found_date_types, + }, + error_level=( + self.params["required_date_error_level"] + if date_required + else "OK" + ), + ) + + def validate_complete_date_for_critical_types(self): + """ + Rule 6: Validate complete dates for critical date types. + + For received, accepted, corrected, retracted, expression-of-concern: + , , and are required. Validation is scoped per + container. + + Yields: + dict: Validation result for each critical date + """ + for ctx in self._get_contexts(): + container = ctx["container"] + parent = self._build_parent_info(ctx) + + for date_elem in container.findall("history/date"): + date_type = date_elem.get("date-type") + + if date_type not in COMPLETE_DATE_REQUIRED_TYPES: + continue + + day = date_elem.findtext("day") + month = date_elem.findtext("month") + year = date_elem.findtext("year") + + has_day = day is not None and day.strip() != "" + has_month = month is not None and month.strip() != "" + has_year = year is not None and year.strip() != "" + + is_complete = has_day and has_month and has_year + + date_parts = { + "day": day, + "month": month, + "year": year, + "date-type": date_type, + } + + missing_parts = [] + if not has_day: + missing_parts.append("day") + if not has_month: + missing_parts.append("month") + if not has_year: + missing_parts.append("year") + + advice = None + if not is_complete: + advice = ( + f'Add missing elements to : ' + f"{', '.join(missing_parts)}" + ) + + yield build_response( + title=f"complete date for {date_type}", + parent=parent, + item="date", + sub_item=f"@date-type='{date_type}'", + validation_type="format", + is_valid=is_complete, + expected="complete date with day, month, and year", + obtained=f"day={day}, month={month}, year={year}", + advice=advice, + data=date_parts, + error_level=self.params["complete_date_error_level"], + ) + + def validate_year_presence(self): + """ + Rule 7: Validate that all dates have at least . + + For all date types, at least must be present. Validation is + scoped per container. + + Yields: + dict: Validation result for each date + """ + for ctx in self._get_contexts(): + container = ctx["container"] + parent = self._build_parent_info(ctx) + + for date_elem in container.findall("history/date"): + date_type = date_elem.get("date-type") + year = date_elem.findtext("year") + + has_year = year is not None and year.strip() != "" + + day = date_elem.findtext("day") + month = date_elem.findtext("month") + date_parts = { + "day": day, + "month": month, + "year": year, + "date-type": date_type, + } + + advice = None + if not has_year: + advice = ( + f'Add element to ' + ) + + yield build_response( + title=f"year presence for {date_type}", + parent=parent, + item="date", + sub_item="year", + validation_type="exist", + is_valid=has_year, + expected=" element present", + obtained=year if has_year else "missing", + advice=advice, + data=date_parts, + error_level=self.params["year_presence_error_level"], + ) + + def validate(self): + """ + Perform all history validations. + + Yields: + Generator of validation results for all checks + """ + yield from self.validate_history_uniqueness() + yield from self.validate_date_type_presence() + yield from self.validate_date_type_values() + yield from self.validate_required_dates() + yield from self.validate_complete_date_for_critical_types() + yield from self.validate_year_presence() diff --git a/packtools/sps/validation/xml_validations.py b/packtools/sps/validation/xml_validations.py index 460750cf3..0f7ea5a69 100644 --- a/packtools/sps/validation/xml_validations.py +++ b/packtools/sps/validation/xml_validations.py @@ -45,6 +45,7 @@ from packtools.sps.validation.app_group import AppValidation from packtools.sps.validation.supplementary_material import XmlSupplementaryMaterialValidation +from packtools.sps.validation.history import HistoryValidation from packtools.sps.validation.ext_link import ExtLinkValidation @@ -306,6 +307,13 @@ def validate_supplementary_materials(xmltree, params): yield from validator.validate() +def validate_history(xmltree, params): + """Validate the element according to SPS 1.10 rules.""" + rules = {} + rules.update(params.get("history_dates_rules", {})) + validator = HistoryValidation(xmltree, rules) + yield from validator.validate() + def validate_ext_links(xmltree, params): """ Validates ext-link elements according to SPS 1.10 specification. diff --git a/packtools/sps/validation/xml_validator.py b/packtools/sps/validation/xml_validator.py index e3ccfe273..4c7ab76d8 100644 --- a/packtools/sps/validation/xml_validator.py +++ b/packtools/sps/validation/xml_validator.py @@ -56,6 +56,10 @@ def validate_xml_content(xmltree, rules): "group": "article dates", "items": xml_validations.validate_article_dates(xmltree, params), } + yield { + "group": "history", + "items": xml_validations.validate_history(xmltree, params), + } yield { "group": "article languages", "items": xml_validations.validate_article_languages(xmltree, params), diff --git a/tests/sps/validation/test_history.py b/tests/sps/validation/test_history.py new file mode 100644 index 000000000..6b8e12ff5 --- /dev/null +++ b/tests/sps/validation/test_history.py @@ -0,0 +1,996 @@ +""" +Tests for history element validations according to SPS 1.10. + +This module tests the validation rules for the element, +ensuring compliance with the SPS 1.10 specification. +""" + +from unittest import TestCase +from lxml import etree + +from packtools.sps.validation.history import ( + HistoryValidation, + ALLOWED_DATE_TYPES, + COMPLETE_DATE_REQUIRED_TYPES, + EXEMPT_ARTICLE_TYPES, +) + + +class TestHistoryUniqueness(TestCase): + """Tests for Rule 1: History element uniqueness.""" + + def test_single_history_in_article_meta(self): + """Test that a single in article-meta is valid.""" + xml = """ +
+ + + + + 15 + 03 + 2024 + + + + +
+ """ + tree = etree.fromstring(xml) + validator = HistoryValidation(tree) + results = list(validator.validate_history_uniqueness()) + + self.assertEqual(len(results), 1) + self.assertEqual(results[0]["response"], "OK") + self.assertEqual(results[0]["title"], "history uniqueness") + + def test_single_history_in_front_stub(self): + """Test that a single in front-stub is valid.""" + xml = """ +
+ + + + + 15 + 03 + 2024 + + + + +
+ """ + tree = etree.fromstring(xml) + validator = HistoryValidation(tree) + results = list(validator.validate_history_uniqueness()) + + self.assertEqual(len(results), 1) + self.assertEqual(results[0]["response"], "OK") + + def test_multiple_history_elements(self): + """Test that multiple elements are invalid.""" + xml = """ +
+ + + + + 15 + 03 + 2024 + + + + + 20 + 04 + 2024 + + + + +
+ """ + tree = etree.fromstring(xml) + validator = HistoryValidation(tree) + results = list(validator.validate_history_uniqueness()) + + self.assertEqual(len(results), 1) + self.assertNotEqual(results[0]["response"], "OK") + self.assertEqual(results[0]["response"], "ERROR") + self.assertIn("duplicate", results[0]["advice"].lower()) + self.assertIn("2", results[0]["got_value"]) + + def test_no_history_element(self): + """Test that no element is valid.""" + xml = """ +
+ + + + +
+ """ + tree = etree.fromstring(xml) + validator = HistoryValidation(tree) + results = list(validator.validate_history_uniqueness()) + + self.assertEqual(len(results), 1) + self.assertEqual(results[0]["response"], "OK") + + +class TestDateTypePresence(TestCase): + """Tests for Rule 2: @date-type attribute presence.""" + + def test_date_with_date_type(self): + """Test that with @date-type is valid.""" + xml = """ +
+ + + + + 15 + 03 + 2024 + + + + +
+ """ + tree = etree.fromstring(xml) + validator = HistoryValidation(tree) + results = list(validator.validate_date_type_presence()) + + self.assertEqual(len(results), 1) + self.assertEqual(results[0]["response"], "OK") + self.assertEqual(results[0]["got_value"], "received") + + def test_date_without_date_type(self): + """Test that without @date-type is invalid.""" + xml = """ +
+ + + + + 15 + 03 + 2024 + + + + +
+ """ + tree = etree.fromstring(xml) + validator = HistoryValidation(tree) + results = list(validator.validate_date_type_presence()) + + self.assertEqual(len(results), 1) + self.assertNotEqual(results[0]["response"], "OK") + self.assertEqual(results[0]["response"], "CRITICAL") + self.assertEqual(results[0]["got_value"], "missing") + self.assertIn("Add @date-type", results[0]["advice"]) + + def test_date_with_empty_date_type(self): + """Test that with empty @date-type is invalid.""" + xml = """ +
+ + + + + 15 + 03 + 2024 + + + + +
+ """ + tree = etree.fromstring(xml) + validator = HistoryValidation(tree) + results = list(validator.validate_date_type_presence()) + + self.assertEqual(len(results), 1) + self.assertNotEqual(results[0]["response"], "OK") + self.assertEqual(results[0]["response"], "CRITICAL") + + def test_multiple_dates_mixed_presence(self): + """Test validation of multiple dates with mixed @date-type presence.""" + xml = """ +
+ + + + + 15 + 03 + 2024 + + + 20 + 04 + 2024 + + + 25 + 05 + 2024 + + + + +
+ """ + tree = etree.fromstring(xml) + validator = HistoryValidation(tree) + results = list(validator.validate_date_type_presence()) + + self.assertEqual(len(results), 3) + # First and third should be valid + self.assertEqual(results[0]["response"], "OK") + self.assertNotEqual(results[1]["response"], "OK") + self.assertEqual(results[2]["response"], "OK") + + +class TestDateTypeValues(TestCase): + """Tests for Rule 3: Allowed @date-type values.""" + + def test_valid_date_types(self): + """Test that all allowed date types are valid.""" + for date_type in ALLOWED_DATE_TYPES: + xml = f""" +
+ + + + + 2024 + + + + +
+ """ + tree = etree.fromstring(xml) + validator = HistoryValidation(tree) + results = list(validator.validate_date_type_values()) + + self.assertEqual(len(results), 1, f"Failed for date-type={date_type}") + self.assertEqual(results[0]["response"], "OK", f"Failed for date-type={date_type}") + self.assertEqual(results[0]["got_value"], date_type) + + def test_invalid_date_type(self): + """Test that invalid date types are rejected.""" + xml = """ +
+ + + + + 2024 + + + + +
+ """ + tree = etree.fromstring(xml) + validator = HistoryValidation(tree) + results = list(validator.validate_date_type_values()) + + self.assertEqual(len(results), 1) + self.assertNotEqual(results[0]["response"], "OK") + self.assertEqual(results[0]["response"], "CRITICAL") + self.assertEqual(results[0]["got_value"], "invalid-type") + self.assertIn("allowed values", results[0]["advice"]) + + def test_multiple_dates_mixed_validity(self): + """Test validation with both valid and invalid date types.""" + xml = """ +
+ + + + + 2024 + + + 2024 + + + 2024 + + + + +
+ """ + tree = etree.fromstring(xml) + validator = HistoryValidation(tree) + results = list(validator.validate_date_type_values()) + + self.assertEqual(len(results), 3) + self.assertEqual(results[0]["response"], "OK") # received + self.assertNotEqual(results[1]["response"], "OK") # bad-type + self.assertEqual(results[2]["response"], "OK") # accepted + + +class TestRequiredDates(TestCase): + """Tests for Rules 4 & 5: Required dates (received, accepted).""" + + def test_regular_article_with_required_dates(self): + """Test that regular articles require received and accepted dates.""" + xml = """ +
+ + + + + 15 + 03 + 2024 + + + 20 + 05 + 2024 + + + + +
+ """ + tree = etree.fromstring(xml) + validator = HistoryValidation(tree) + results = list(validator.validate_required_dates()) + + # Should have 2 results: one for received, one for accepted + self.assertEqual(len(results), 2) + # Both should be valid + self.assertTrue(all(r["response"] == "OK" for r in results)) + + def test_regular_article_missing_received(self): + """Test that regular articles without received date are invalid.""" + xml = """ +
+ + + + + 20 + 05 + 2024 + + + + +
+ """ + tree = etree.fromstring(xml) + validator = HistoryValidation(tree) + results = list(validator.validate_required_dates()) + + self.assertEqual(len(results), 2) + # received should be invalid + received_result = next(r for r in results if "received" in r["title"]) + self.assertNotEqual(received_result["response"], "OK") + self.assertEqual(received_result["response"], "CRITICAL") + self.assertIn("Add ", received_result["advice"]) + # accepted should be valid + accepted_result = next(r for r in results if "accepted" in r["title"]) + self.assertEqual(accepted_result["response"], "OK") + + def test_regular_article_missing_accepted(self): + """Test that regular articles without accepted date are invalid.""" + xml = """ +
+ + + + + 15 + 03 + 2024 + + + + +
+ """ + tree = etree.fromstring(xml) + validator = HistoryValidation(tree) + results = list(validator.validate_required_dates()) + + self.assertEqual(len(results), 2) + # received should be valid + received_result = next(r for r in results if "received" in r["title"]) + self.assertEqual(received_result["response"], "OK") + # accepted should be invalid + accepted_result = next(r for r in results if "accepted" in r["title"]) + self.assertNotEqual(accepted_result["response"], "OK") + self.assertEqual(accepted_result["response"], "CRITICAL") + + def test_exempt_article_types(self): + """Test that exempt article types don't require received/accepted.""" + for article_type in EXEMPT_ARTICLE_TYPES: + xml = f""" +
+ + + + + 15 + 03 + 2024 + + + + +
+ """ + tree = etree.fromstring(xml) + validator = HistoryValidation(tree) + results = list(validator.validate_required_dates()) + + # Should have 2 results but both should be valid (not required) + self.assertEqual(len(results), 2, f"Failed for article-type={article_type}") + self.assertTrue(all(r["response"] == "OK" for r in results), f"Failed for article-type={article_type}") + + def test_retraction_without_required_dates(self): + """Test specific case: retraction article type.""" + xml = """ +
+ + + + + 20 + 06 + 2024 + + + + +
+ """ + tree = etree.fromstring(xml) + validator = HistoryValidation(tree) + results = list(validator.validate_required_dates()) + + self.assertEqual(len(results), 2) + # Both should be valid since retraction is exempt + self.assertTrue(all(r["response"] == "OK" for r in results)) + + +class TestCompleteDateForCriticalTypes(TestCase): + """Tests for Rule 6: Complete date requirements for critical types.""" + + def test_received_with_complete_date(self): + """Test that received date with complete date is valid.""" + xml = """ +
+ + + + + 15 + 03 + 2024 + + + + +
+ """ + tree = etree.fromstring(xml) + validator = HistoryValidation(tree) + results = list(validator.validate_complete_date_for_critical_types()) + + self.assertEqual(len(results), 1) + self.assertEqual(results[0]["response"], "OK") + + def test_received_missing_day(self): + """Test that received date without day is invalid.""" + xml = """ +
+ + + + + 03 + 2024 + + + + +
+ """ + tree = etree.fromstring(xml) + validator = HistoryValidation(tree) + results = list(validator.validate_complete_date_for_critical_types()) + + self.assertEqual(len(results), 1) + self.assertNotEqual(results[0]["response"], "OK") + self.assertEqual(results[0]["response"], "CRITICAL") + self.assertIn("day", results[0]["advice"]) + + def test_accepted_missing_month(self): + """Test that accepted date without month is invalid.""" + xml = """ +
+ + + + + 15 + 2024 + + + + +
+ """ + tree = etree.fromstring(xml) + validator = HistoryValidation(tree) + results = list(validator.validate_complete_date_for_critical_types()) + + self.assertEqual(len(results), 1) + self.assertNotEqual(results[0]["response"], "OK") + self.assertEqual(results[0]["response"], "CRITICAL") + self.assertIn("month", results[0]["advice"]) + + def test_corrected_missing_year(self): + """Test that corrected date without year is invalid.""" + xml = """ +
+ + + + + 15 + 03 + + + + +
+ """ + tree = etree.fromstring(xml) + validator = HistoryValidation(tree) + results = list(validator.validate_complete_date_for_critical_types()) + + self.assertEqual(len(results), 1) + self.assertNotEqual(results[0]["response"], "OK") + self.assertEqual(results[0]["response"], "CRITICAL") + self.assertIn("year", results[0]["advice"]) + + def test_all_critical_types(self): + """Test that all critical types are validated for completeness.""" + for date_type in COMPLETE_DATE_REQUIRED_TYPES: + xml = f""" +
+ + + + + 15 + 03 + 2024 + + + + +
+ """ + tree = etree.fromstring(xml) + validator = HistoryValidation(tree) + results = list(validator.validate_complete_date_for_critical_types()) + + self.assertEqual(len(results), 1, f"Failed for date-type={date_type}") + self.assertEqual(results[0]["response"], "OK", f"Failed for date-type={date_type}") + + def test_non_critical_type_not_validated(self): + """Test that non-critical types are not validated by this rule.""" + xml = """ +
+ + + + + 2023 + + + 2024 + + + + +
+ """ + tree = etree.fromstring(xml) + validator = HistoryValidation(tree) + results = list(validator.validate_complete_date_for_critical_types()) + + # Should not return any results for non-critical types + self.assertEqual(len(results), 0) + + def test_multiple_critical_dates_mixed(self): + """Test validation of multiple critical dates with mixed completeness.""" + xml = """ +
+ + + + + 15 + 03 + 2024 + + + 05 + 2024 + + + 10 + 07 + 2024 + + + + +
+ """ + tree = etree.fromstring(xml) + validator = HistoryValidation(tree) + results = list(validator.validate_complete_date_for_critical_types()) + + self.assertEqual(len(results), 3) + # received should be valid + self.assertEqual(results[0]["response"], "OK") + # accepted should be invalid (missing day) + self.assertNotEqual(results[1]["response"], "OK") + # corrected should be valid + self.assertEqual(results[2]["response"], "OK") + + +class TestYearPresence(TestCase): + """Tests for Rule 7: Year presence for all dates.""" + + def test_date_with_year(self): + """Test that date with year is valid.""" + xml = """ +
+ + + + + 2023 + + + + +
+ """ + tree = etree.fromstring(xml) + validator = HistoryValidation(tree) + results = list(validator.validate_year_presence()) + + self.assertEqual(len(results), 1) + self.assertEqual(results[0]["response"], "OK") + self.assertEqual(results[0]["got_value"], "2023") + + def test_date_without_year(self): + """Test that date without year is invalid.""" + xml = """ +
+ + + + + 09 + 21 + + + + +
+ """ + tree = etree.fromstring(xml) + validator = HistoryValidation(tree) + results = list(validator.validate_year_presence()) + + self.assertEqual(len(results), 1) + self.assertNotEqual(results[0]["response"], "OK") + self.assertEqual(results[0]["response"], "CRITICAL") + self.assertEqual(results[0]["got_value"], "missing") + self.assertIn("Add ", results[0]["advice"]) + + def test_date_with_empty_year(self): + """Test that date with empty year is invalid.""" + xml = """ +
+ + + + + + + + + +
+ """ + tree = etree.fromstring(xml) + validator = HistoryValidation(tree) + results = list(validator.validate_year_presence()) + + self.assertEqual(len(results), 1) + self.assertNotEqual(results[0]["response"], "OK") + self.assertEqual(results[0]["response"], "CRITICAL") + + def test_multiple_dates_mixed_year_presence(self): + """Test validation of multiple dates with mixed year presence.""" + xml = """ +
+ + + + + 15 + 03 + 2024 + + + 20 + 05 + + + 2024 + + + + +
+ """ + tree = etree.fromstring(xml) + validator = HistoryValidation(tree) + results = list(validator.validate_year_presence()) + + self.assertEqual(len(results), 3) + # received should be valid + self.assertEqual(results[0]["response"], "OK") + # accepted should be invalid (missing year) + self.assertNotEqual(results[1]["response"], "OK") + # pub should be valid + self.assertEqual(results[2]["response"], "OK") + + +class TestFullValidation(TestCase): + """Tests for complete validation workflow.""" + + def test_valid_complete_example(self): + """Test validation of a completely valid history.""" + xml = """ +
+ + + + + 15 + 03 + 2024 + + + 12 + 05 + 2024 + + + 21 + 09 + 2023 + + + + +
+ """ + tree = etree.fromstring(xml) + validator = HistoryValidation(tree) + results = list(validator.validate()) + + # All results should be valid + errors = [r for r in results if r["response"] != "OK"] + self.assertEqual(len(errors), 0, f"Found errors: {errors}") + + def test_invalid_multiple_issues(self): + """Test validation with multiple issues.""" + xml = """ +
+ + + + + 15 + 03 + 2024 + + + 05 + 2024 + + + + +
+ """ + tree = etree.fromstring(xml) + validator = HistoryValidation(tree) + results = list(validator.validate()) + + # Should have multiple errors + errors = [r for r in results if r["response"] != "OK"] + self.assertGreater(len(errors), 0) + + # Check for specific error types + error_titles = [e["title"] for e in errors] + self.assertIn("date-type presence", error_titles) + self.assertIn("date-type value", error_titles) + self.assertIn("required date: received", error_titles) + self.assertIn("required date: accepted", error_titles) + + def test_retraction_article_valid(self): + """Test validation of retraction article (exempt from received/accepted).""" + xml = """ +
+ + + + + 20 + 06 + 2024 + + + + +
+ """ + tree = etree.fromstring(xml) + validator = HistoryValidation(tree) + results = list(validator.validate()) + + # All results should be valid (retraction is exempt) + errors = [r for r in results if r["response"] != "OK"] + self.assertEqual(len(errors), 0, f"Found errors: {errors}") + + +class TestHistoryDateRulesJsonSchema(TestCase): + """Tests for consumption of history_dates_rules JSON configuration schema.""" + + RULES = { + "error_level": "CRITICAL", + "date_list": [ + {"type": "preprint", "required": False}, + {"type": "received", "required": True}, + {"type": "resubmitted", "required": False}, + {"type": "rev-request", "required": True}, + {"type": "rev-recd", "required": True}, + {"type": "accepted", "required": True}, + {"type": "pub", "required": True}, + {"type": "corrected", "required": False}, + {"type": "retracted", "required": False}, + ], + } + + def test_error_level_propagates_to_all_rules(self): + """error_level from JSON is used as default for every rule.""" + xml = "
" + tree = etree.fromstring(xml) + validator = HistoryValidation(tree, dict(self.RULES)) + + for key in ( + "date_type_presence_error_level", + "date_type_value_error_level", + "required_date_error_level", + "complete_date_error_level", + "year_presence_error_level", + ): + self.assertEqual(validator.params[key], "CRITICAL", f"Expected CRITICAL for {key}") + + def test_required_date_types_built_from_date_list(self): + """required_date_types is derived from date_list entries with required=True.""" + xml = "
" + tree = etree.fromstring(xml) + validator = HistoryValidation(tree, dict(self.RULES)) + + expected = {"received", "rev-request", "rev-recd", "accepted", "pub"} + self.assertEqual(set(validator.required_date_types), expected) + + def test_non_required_types_not_checked(self): + """date_list entries with required=False do not trigger required-date errors.""" + xml = "
" + tree = etree.fromstring(xml) + validator = HistoryValidation(tree, dict(self.RULES)) + + self.assertNotIn("preprint", validator.required_date_types) + self.assertNotIn("resubmitted", validator.required_date_types) + self.assertNotIn("corrected", validator.required_date_types) + self.assertNotIn("retracted", validator.required_date_types) + + def test_validate_required_dates_uses_date_list(self): + """validate_required_dates yields one result per required type from date_list.""" + xml = """ +
+ + + + + 10012024 + + + 20032024 + + + 01022024 + + + 15022024 + + + 2024 + + + + +
+ """ + tree = etree.fromstring(xml) + validator = HistoryValidation(tree, dict(self.RULES)) + results = list(validator.validate_required_dates()) + + # Five required types → five results, all valid + self.assertEqual(len(results), 5) + self.assertTrue(all(r["response"] == "OK" for r in results)) + + def test_missing_required_date_from_date_list(self): + """Missing a date type flagged as required in date_list raises an error.""" + xml = """ +
+ + + + + 10012024 + + + + +
+ """ + tree = etree.fromstring(xml) + validator = HistoryValidation(tree, dict(self.RULES)) + results = list(validator.validate_required_dates()) + + errors = [r for r in results if r["response"] != "OK"] + error_titles = [e["title"] for e in errors] + + self.assertIn("required date: accepted", error_titles) + self.assertIn("required date: rev-request", error_titles) + self.assertIn("required date: rev-recd", error_titles) + self.assertIn("required date: pub", error_titles) + self.assertNotIn("required date: received", error_titles) + + def test_fallback_when_no_date_list(self): + """Without date_list, validator falls back to requiring received and accepted.""" + xml = "
" + tree = etree.fromstring(xml) + validator = HistoryValidation(tree) + + self.assertEqual(set(validator.required_date_types), {"received", "accepted"}) diff --git a/tests/sps/validation/test_history_integration.py b/tests/sps/validation/test_history_integration.py new file mode 100644 index 000000000..1047e6376 --- /dev/null +++ b/tests/sps/validation/test_history_integration.py @@ -0,0 +1,216 @@ +""" +Integration tests for history validation in the orchestrator. + +This module tests that the history validation is properly integrated +into the xml_validator orchestrator. +""" + +from unittest import TestCase +from lxml import etree + +from packtools.sps.validation.xml_validator import validate_xml_content +from packtools.sps.validation.xml_validator_rules import get_default_rules + + +class TestHistoryIntegration(TestCase): + """Tests for history validation integration in the orchestrator.""" + + def test_history_validation_group_exists(self): + """Test that the history validation group is present in orchestrator.""" + xml = """ +
+ + + + + 15 + 03 + 2024 + + + 12 + 05 + 2024 + + + + +
+ """ + tree = etree.fromstring(xml) + rules = get_default_rules() + + # Check that history group exists + groups = [] + for group_result in validate_xml_content(tree, rules): + groups.append(group_result['group']) + + self.assertIn('history', groups, + f"History group not found. Available groups: {groups}") + + def test_history_validation_with_valid_xml(self): + """Test that valid history XML passes validation. + + The XML must include all date types marked as required=true in + history_dates_rules.json: received, rev-request, rev-recd, accepted, pub. + """ + xml = """ +
+ + + + + 15 + 01 + 2024 + + + 01 + 02 + 2024 + + + 20 + 02 + 2024 + + + 12 + 03 + 2024 + + + 2024 + + + + +
+ """ + tree = etree.fromstring(xml) + rules = get_default_rules() + + # Get history validation results + for group_result in validate_xml_content(tree, rules): + if group_result['group'] == 'history': + items = list(group_result['items']) + errors = [item for item in items if item and item.get('response') != 'OK'] + + # Should have no errors + self.assertEqual(len(errors), 0, + f"Expected no errors, but found: {errors}") + # Should have some validations + self.assertGreater(len(items), 0, + "Should have at least one validation") + break + else: + self.fail("History validation group not found") + + def test_history_validation_with_invalid_xml(self): + """Test that invalid history XML is caught by validation.""" + xml = """ +
+ + + + + 2023 + + + + +
+ """ + tree = etree.fromstring(xml) + rules = get_default_rules() + + # Get history validation results + for group_result in validate_xml_content(tree, rules): + if group_result['group'] == 'history': + items = list(group_result['items']) + errors = [item for item in items if item and item.get('response') != 'OK'] + + # Should have errors for missing required dates + self.assertGreater(len(errors), 0, + "Expected errors for missing required dates") + + # Check for specific errors + error_titles = [err.get('title') for err in errors] + self.assertIn('required date: received', error_titles) + self.assertIn('required date: accepted', error_titles) + break + else: + self.fail("History validation group not found") + + def test_history_validation_with_multiple_history_elements(self): + """Test that multiple history elements are caught.""" + xml = """ +
+ + + + + 15 + 03 + 2024 + + + + + 12 + 05 + 2024 + + + + +
+ """ + tree = etree.fromstring(xml) + rules = get_default_rules() + + # Get history validation results + for group_result in validate_xml_content(tree, rules): + if group_result['group'] == 'history': + items = list(group_result['items']) + errors = [item for item in items if item and item.get('response') != 'OK'] + + # Should have error for duplicate history + error_titles = [err.get('title') for err in errors] + self.assertIn('history uniqueness', error_titles) + break + else: + self.fail("History validation group not found") + + def test_history_validation_with_exempt_article_type(self): + """Test that exempt article types don't require received/accepted dates.""" + xml = """ +
+ + + + + 20 + 06 + 2024 + + + + +
+ """ + tree = etree.fromstring(xml) + rules = get_default_rules() + + # Get history validation results + for group_result in validate_xml_content(tree, rules): + if group_result['group'] == 'history': + items = list(group_result['items']) + errors = [item for item in items if item and item.get('response') != 'OK'] + + # Should have no errors (retraction is exempt) + self.assertEqual(len(errors), 0, + f"Expected no errors for exempt article type, but found: {errors}") + break + else: + self.fail("History validation group not found")