From 889500cda45e2042829032672722ea8c1faab5bf Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Fri, 13 Feb 2026 12:11:43 +0000 Subject: [PATCH 01/12] Initial plan From ef9b90934905b0535fd99616b2b691103111fa69 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Fri, 13 Feb 2026 12:13:51 +0000 Subject: [PATCH 02/12] Initial analysis: existing funding_group validation structure identified Co-authored-by: robertatakenaka <505143+robertatakenaka@users.noreply.github.com> --- src/scielo-scholarly-data | 1 + 1 file changed, 1 insertion(+) create mode 160000 src/scielo-scholarly-data diff --git a/src/scielo-scholarly-data b/src/scielo-scholarly-data new file mode 160000 index 000000000..a2899ce8a --- /dev/null +++ b/src/scielo-scholarly-data @@ -0,0 +1 @@ +Subproject commit a2899ce8a1fa77396c516640d36686351210d606 From daab4eb75911d73a641d6a7c47fc66dc15928755 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Fri, 13 Feb 2026 12:15:58 +0000 Subject: [PATCH 03/12] Add 7 new validation methods for funding-group SPS 1.10 compliance with comprehensive tests Co-authored-by: robertatakenaka <505143+robertatakenaka@users.noreply.github.com> --- packtools/sps/validation/funding_group.py | 292 ++++++++++++ src/scielo-scholarly-data | 1 - tests/sps/validation/test_funding_group.py | 490 +++++++++++++++++++++ 3 files changed, 782 insertions(+), 1 deletion(-) delete mode 160000 src/scielo-scholarly-data diff --git a/packtools/sps/validation/funding_group.py b/packtools/sps/validation/funding_group.py index e2c0d6d36..08d344138 100644 --- a/packtools/sps/validation/funding_group.py +++ b/packtools/sps/validation/funding_group.py @@ -154,3 +154,295 @@ def validate_funding_statement(self): data=statements, error_level=self.params["funding_statement_error_level"], ) + + def validate_funding_group_uniqueness(self, error_level="ERROR"): + """ + Rule 1: Validates that appears at most once in . + + According to SPS 1.10, only one is allowed per . + + Params + ------ + error_level : str, optional + The severity level of the validation error, by default "ERROR". + + Yields + ------ + dict + Validation result for funding-group uniqueness. + """ + funding_groups = self.xml_tree.xpath(".//article-meta/funding-group") + count = len(funding_groups) + + funding_data = self.funding.data + parent = { + "parent": "article", + "parent_id": None, + "parent_article_type": funding_data.get("article_type"), + "parent_lang": funding_data.get("article_lang"), + } + + is_valid = count <= 1 + advice = None + if not is_valid: + advice = f"Found {count} elements in . Only one is allowed. Merge them into a single ." + + yield build_response( + title="funding-group uniqueness", + parent=parent, + item="funding-group", + sub_item=None, + validation_type="unique", + is_valid=is_valid, + expected="At most one in ", + obtained=f"{count} element(s) found", + advice=advice, + data={"count": count}, + error_level=error_level, + ) + + def validate_funding_statement_presence(self, error_level="CRITICAL"): + """ + Rule 2: Validates that is present in . + + According to SPS 1.10, is mandatory in all cases. + + Params + ------ + error_level : str, optional + The severity level of the validation error, by default "CRITICAL". + + Yields + ------ + dict + Validation result for funding-statement presence. + """ + funding_groups = self.xml_tree.xpath(".//article-meta/funding-group") + + if not funding_groups: + # No funding-group means no validation needed + return + + funding_data = self.funding.data + parent = { + "parent": "article", + "parent_id": None, + "parent_article_type": funding_data.get("article_type"), + "parent_lang": funding_data.get("article_lang"), + } + + funding_statement = self.funding.funding_statement + is_valid = funding_statement is not None + + advice = None + if not is_valid: + advice = "Add element inside . It is mandatory according to SPS 1.10." + + yield build_response( + title="funding-statement presence", + parent=parent, + item="funding-statement", + sub_item=None, + validation_type="exist", + is_valid=is_valid, + expected=" present in ", + obtained=funding_statement if funding_statement else "None", + advice=advice, + data={"funding_statement": funding_statement}, + error_level=error_level, + ) + + def validate_funding_source_in_award_group(self, error_level="CRITICAL"): + """ + Rule 3: Validates that is present when exists. + + According to SPS 1.10, when there are institutions declared via , + is mandatory. + + Params + ------ + error_level : str, optional + The severity level of the validation error, by default "CRITICAL". + + Yields + ------ + dict + Validation results for each award-group. + """ + funding_data = self.funding.data + parent = { + "parent": "article", + "parent_id": None, + "parent_article_type": funding_data.get("article_type"), + "parent_lang": funding_data.get("article_lang"), + } + + for item in self.funding.award_groups: + funding_sources = item["funding-source"] + + is_valid = len(funding_sources) > 0 + advice = None + if not is_valid: + advice = "Add at least one element inside this . It is mandatory when exists." + + yield build_response( + title="funding-source in award-group", + parent=parent, + item="award-group", + sub_item="funding-source", + validation_type="exist", + is_valid=is_valid, + expected="At least one in ", + obtained=f"{len(funding_sources)} element(s) found", + advice=advice, + data=item, + error_level=error_level, + ) + + def validate_label_absence(self, error_level="ERROR"): + """ + Rule 5: Validates that + + + """ + xml_tree = etree.fromstring(xml) + validator = FundingGroupValidation(xml_tree, self.params) + results = list(validator.validate_title_absence()) + + self.assertEqual(len(results), 1) + self.assertEqual(results[0]["response"], "ERROR") + self.assertIn("Remove", results[0]["advice"]) + self.assertIn("", results[0]["advice"]) + + +class TestAwardIdFundingSourceConsistency(TestFundingValidationBase): + """Rule 7: Test <award-id> and <funding-source> consistency validation""" + + def test_support_without_contract_valid(self): + """Support without contract (0 award-ids) should be valid""" + xml = """ + <article article-type="research-article" xml:lang="en"> + <front> + <article-meta> + <funding-group> + <award-group> + <funding-source>FAPESP</funding-source> + </award-group> + <funding-statement>Funded by FAPESP</funding-statement> + </funding-group> + </article-meta> + </front> + </article> + """ + xml_tree = etree.fromstring(xml) + validator = FundingGroupValidation(xml_tree, self.params) + results = list(validator.validate_award_id_funding_source_consistency()) + + self.assertEqual(len(results), 1) + self.assertEqual(results[0]["response"], "OK") + + def test_single_contract_valid(self): + """Single contract (1 award-id) for multiple sources should be valid""" + xml = """ + <article article-type="research-article" xml:lang="en"> + <front> + <article-meta> + <funding-group> + <award-group> + <funding-source>FAPESP</funding-source> + <funding-source>CAPES</funding-source> + <award-id>04/08142-0</award-id> + </award-group> + <funding-statement>Funded by FAPESP and CAPES</funding-statement> + </funding-group> + </article-meta> + </front> + </article> + """ + xml_tree = etree.fromstring(xml) + validator = FundingGroupValidation(xml_tree, self.params) + results = list(validator.validate_award_id_funding_source_consistency()) + + self.assertEqual(len(results), 1) + self.assertEqual(results[0]["response"], "OK") + + def test_matching_quantities_valid(self): + """Matching quantities (N sources, N awards) should be valid""" + xml = """ + <article article-type="research-article" xml:lang="en"> + <front> + <article-meta> + <funding-group> + <award-group> + <funding-source>FAPESP</funding-source> + <funding-source>CAPES</funding-source> + <award-id>04/08142-0</award-id> + <award-id>05/09876-5</award-id> + </award-group> + <funding-statement>Funded by FAPESP and CAPES</funding-statement> + </funding-group> + </article-meta> + </front> + </article> + """ + xml_tree = etree.fromstring(xml) + validator = FundingGroupValidation(xml_tree, self.params) + results = list(validator.validate_award_id_funding_source_consistency()) + + self.assertEqual(len(results), 1) + self.assertEqual(results[0]["response"], "OK") + + def test_inconsistent_quantities_warning(self): + """Inconsistent quantities should trigger warning""" + xml = """ + <article article-type="research-article" xml:lang="en"> + <front> + <article-meta> + <funding-group> + <award-group> + <funding-source>FAPESP</funding-source> + <award-id>04/08142-0</award-id> + <award-id>05/09876-5</award-id> + <award-id>06/12345-6</award-id> + </award-group> + <funding-statement>Funded by FAPESP</funding-statement> + </funding-group> + </article-meta> + </front> + </article> + """ + xml_tree = etree.fromstring(xml) + validator = FundingGroupValidation(xml_tree, self.params) + results = list(validator.validate_award_id_funding_source_consistency()) + + self.assertEqual(len(results), 1) + self.assertEqual(results[0]["response"], "WARNING") + self.assertIn("Inconsistent quantities", results[0]["advice"]) + + +class TestCompleteValidExamples(TestFundingValidationBase): + """Test complete valid XML examples from the issue""" + + def test_example_1_funding_with_contract(self): + """Example 1: Financing with contract number""" + xml = """ + <article article-type="research-article" xml:lang="en"> + <front> + <article-meta> + <funding-group> + <award-group> + <funding-source>Fundação de Amparo à Pesquisa do Estado de São Paulo (FAPESP)</funding-source> + <award-id>04/08142-0</award-id> + </award-group> + <funding-statement>This study was supported by Fundação de Amparo à Pesquisa do Estado de São Paulo (FAPESP - Grant no. 04/08142-0; São Paulo, Brazil)</funding-statement> + </funding-group> + </article-meta> + </front> + </article> + """ + xml_tree = etree.fromstring(xml) + validator = FundingGroupValidation(xml_tree, self.params) + + # All validations should pass + uniqueness = list(validator.validate_funding_group_uniqueness()) + statement = list(validator.validate_funding_statement_presence()) + source = list(validator.validate_funding_source_in_award_group()) + label = list(validator.validate_label_absence()) + title = list(validator.validate_title_absence()) + consistency = list(validator.validate_award_id_funding_source_consistency()) + + self.assertEqual(uniqueness[0]["response"], "OK") + self.assertEqual(statement[0]["response"], "OK") + self.assertEqual(source[0]["response"], "OK") + self.assertEqual(label[0]["response"], "OK") + self.assertEqual(title[0]["response"], "OK") + self.assertEqual(consistency[0]["response"], "OK") + + def test_example_6_negative_funding_declaration(self): + """Example 6: Negative funding declaration""" + xml = """ + <article article-type="research-article" xml:lang="en"> + <front> + <article-meta> + <funding-group> + <funding-statement>Não houve financiamento para esta publicação</funding-statement> + </funding-group> + </article-meta> + </front> + </article> + """ + xml_tree = etree.fromstring(xml) + validator = FundingGroupValidation(xml_tree, self.params) + + # Should pass all checks (no award-group means no source validation) + uniqueness = list(validator.validate_funding_group_uniqueness()) + statement = list(validator.validate_funding_statement_presence()) + source = list(validator.validate_funding_source_in_award_group()) + label = list(validator.validate_label_absence()) + title = list(validator.validate_title_absence()) + + self.assertEqual(uniqueness[0]["response"], "OK") + self.assertEqual(statement[0]["response"], "OK") + self.assertEqual(len(source), 0) # No award-group, so no validation + self.assertEqual(label[0]["response"], "OK") + self.assertEqual(title[0]["response"], "OK") + + if __name__ == "__main__": unittest.main() From fe4c35f976b71ee1ce6f823e58c74fb783187f2d Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Fri, 13 Feb 2026 12:17:34 +0000 Subject: [PATCH 04/12] Fix docstrings: clarify 'descendants' vs 'children' for XPath behavior Co-authored-by: robertatakenaka <505143+robertatakenaka@users.noreply.github.com> --- packtools/sps/validation/funding_group.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/packtools/sps/validation/funding_group.py b/packtools/sps/validation/funding_group.py index 08d344138..8cfbbe0d1 100644 --- a/packtools/sps/validation/funding_group.py +++ b/packtools/sps/validation/funding_group.py @@ -301,7 +301,7 @@ def validate_funding_source_in_award_group(self, error_level="CRITICAL"): def validate_label_absence(self, error_level="ERROR"): """ - Rule 5: Validates that <label> is not present in <funding-group> or its children. + Rule 5: Validates that <label> is not present in <funding-group> or its descendants. According to SPS 1.10, <label> is not allowed inside <funding-group>. @@ -347,7 +347,7 @@ def validate_label_absence(self, error_level="ERROR"): def validate_title_absence(self, error_level="ERROR"): """ - Rule 6: Validates that <title> is not present in <funding-group> or its children. + Rule 6: Validates that <title> is not present in <funding-group> or its descendants. According to SPS 1.10, <title> is not allowed inside <funding-group>. From 24c8f45775031757ca1098cf4a43831a6d2dc783 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Fri, 13 Feb 2026 12:18:39 +0000 Subject: [PATCH 05/12] Improve test assertions: use assertEqual for clearer failure messages Co-authored-by: robertatakenaka <505143+robertatakenaka@users.noreply.github.com> --- tests/sps/validation/test_funding_group.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/sps/validation/test_funding_group.py b/tests/sps/validation/test_funding_group.py index 91dec40fb..cb000d375 100644 --- a/tests/sps/validation/test_funding_group.py +++ b/tests/sps/validation/test_funding_group.py @@ -274,7 +274,7 @@ def test_single_funding_group_valid(self): results = list(validator.validate_funding_group_uniqueness()) self.assertEqual(len(results), 1) - self.assertTrue(results[0]["response"] == "OK") + self.assertEqual(results[0]["response"], "OK") def test_multiple_funding_groups_invalid(self): """Multiple <funding-group> elements should be invalid""" @@ -315,7 +315,7 @@ def test_no_funding_group_valid(self): results = list(validator.validate_funding_group_uniqueness()) self.assertEqual(len(results), 1) - self.assertTrue(results[0]["response"] == "OK") + self.assertEqual(results[0]["response"], "OK") class TestFundingStatementPresence(TestFundingValidationBase): From 2c53be0a17a7249386a9abda6dc855e3eac71a50 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Fri, 13 Feb 2026 12:19:16 +0000 Subject: [PATCH 06/12] Fix typo: 'Financing' to 'Funding' in test docstring --- tests/sps/validation/test_funding_group.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/sps/validation/test_funding_group.py b/tests/sps/validation/test_funding_group.py index cb000d375..26a68e543 100644 --- a/tests/sps/validation/test_funding_group.py +++ b/tests/sps/validation/test_funding_group.py @@ -674,7 +674,7 @@ class TestCompleteValidExamples(TestFundingValidationBase): """Test complete valid XML examples from the issue""" def test_example_1_funding_with_contract(self): - """Example 1: Financing with contract number""" + """Example 1: Funding with contract number""" xml = """ <article article-type="research-article" xml:lang="en"> <front> From 2a54ec9a2e48ef77a9a4fd4e6217d59ef2da5451 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Thu, 26 Feb 2026 14:20:09 +0000 Subject: [PATCH 07/12] Integrate new funding-group validations into orchestrator (xml_validations.py) Co-authored-by: robertatakenaka <505143+robertatakenaka@users.noreply.github.com> --- packtools/sps/validation/xml_validations.py | 22 +++++++++++++++++++++ src/scielo-scholarly-data | 1 + 2 files changed, 23 insertions(+) create mode 160000 src/scielo-scholarly-data diff --git a/packtools/sps/validation/xml_validations.py b/packtools/sps/validation/xml_validations.py index 290ca02d3..f815d9de1 100644 --- a/packtools/sps/validation/xml_validations.py +++ b/packtools/sps/validation/xml_validations.py @@ -209,8 +209,30 @@ def validate_bibliographic_strip(xmltree, params): def validate_funding_data(xmltree, params): funding_data_rules = params["funding_data_rules"] validator = FundingGroupValidation(xmltree, funding_data_rules) + + # Existing validations yield from validator.validate_required_award_ids() yield from validator.validate_funding_statement() + + # New SPS 1.10 validations + yield from validator.validate_funding_group_uniqueness( + error_level=funding_data_rules.get("funding_group_uniqueness_error_level", "ERROR") + ) + yield from validator.validate_funding_statement_presence( + error_level=funding_data_rules.get("funding_statement_presence_error_level", "CRITICAL") + ) + yield from validator.validate_funding_source_in_award_group( + error_level=funding_data_rules.get("funding_source_in_award_group_error_level", "CRITICAL") + ) + yield from validator.validate_label_absence( + error_level=funding_data_rules.get("label_absence_error_level", "ERROR") + ) + yield from validator.validate_title_absence( + error_level=funding_data_rules.get("title_absence_error_level", "ERROR") + ) + yield from validator.validate_award_id_funding_source_consistency( + error_level=funding_data_rules.get("award_id_consistency_error_level", "WARNING") + ) def validate_journal_meta(xmltree, params): diff --git a/src/scielo-scholarly-data b/src/scielo-scholarly-data new file mode 160000 index 000000000..a2899ce8a --- /dev/null +++ b/src/scielo-scholarly-data @@ -0,0 +1 @@ +Subproject commit a2899ce8a1fa77396c516640d36686351210d606 From b8a24d9febd3ddb429bd2a715c5635109d3fdc86 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Thu, 26 Feb 2026 14:29:03 +0000 Subject: [PATCH 08/12] Fix pre-existing test failures: update tests to match current validation behavior Co-authored-by: robertatakenaka <505143+robertatakenaka@users.noreply.github.com> --- tests/sps/validation/test_funding_group.py | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/tests/sps/validation/test_funding_group.py b/tests/sps/validation/test_funding_group.py index 26a68e543..9edfa3d2d 100644 --- a/tests/sps/validation/test_funding_group.py +++ b/tests/sps/validation/test_funding_group.py @@ -55,7 +55,8 @@ def setUp(self): def test_proper_award_group(self): results = list(self.validator.validate_required_award_ids()) - self.assertEqual(len(results), 0) + self.assertEqual(len(results), 1) + self.assertEqual(results[0]["response"], "OK") class TestAwardInAck(TestFundingValidationBase): @@ -107,7 +108,7 @@ def test_award_in_financial_disclosure(self): self.assertEqual(len(results), 1) result = results[0] self.assertEqual( - result["data"]["context"], "fn[@fn-type='financial-disclosure']" + result["data"]["context"], "financial-disclosure" ) self.assertIn("123.456-7", str(result["data"]["look-like-award-id"])) @@ -134,7 +135,7 @@ def test_award_in_supported_by(self): results = list(self.validator.validate_required_award_ids()) self.assertEqual(len(results), 1) result = results[0] - self.assertEqual(result["data"]["context"], "fn[@fn-type='supported-by']") + self.assertEqual(result["data"]["context"], "supported-by") self.assertIn("123.456-7", str(result["data"]["look-like-award-id"])) @@ -160,7 +161,7 @@ def test_award_in_funding_statement(self): results = list(self.validator.validate_required_award_ids()) self.assertEqual(len(results), 1) result = results[0] - self.assertEqual(result["data"]["context"], "funding-group/funding-statement") + self.assertEqual(result["data"]["context"], "funding-statement") self.assertIn("123.456-7", str(result["data"]["look-like-award-id"])) @@ -206,10 +207,10 @@ def test_awards_in_all_locations(self): self.assertEqual(len(contexts), 4) # Verifica cada contexto específico - self.assertIn("funding-group/funding-statement", contexts) + self.assertIn("funding-statement", contexts) self.assertIn("ack", contexts) - self.assertIn("fn[@fn-type='financial-disclosure']", contexts) - self.assertIn("fn[@fn-type='supported-by']", contexts) + self.assertIn("financial-disclosure", contexts) + self.assertIn("supported-by", contexts) # Verifica os award IDs encontrados award_ids = set() @@ -235,14 +236,14 @@ def setUp(self): def test_warning_level(self): params = dict(self.params) - params["error_level"] = "WARNING" + params["award_id_error_level"] = "WARNING" validator = FundingGroupValidation(self.xml_tree, params) results = list(validator.validate_required_award_ids()) self.assertEqual(results[0]["response"], "WARNING") def test_info_level(self): params = dict(self.params) - params["error_level"] = "INFO" + params["award_id_error_level"] = "INFO" validator = FundingGroupValidation(self.xml_tree, params) results = list(validator.validate_required_award_ids()) self.assertEqual(results[0]["response"], "INFO") From 8e351438eca7e4a1908fc1435569ec331c121f90 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Fri, 27 Feb 2026 17:03:03 +0000 Subject: [PATCH 09/12] Fix validate_funding_statement_presence to check each funding-group individually (Case C1) Co-authored-by: robertatakenaka <505143+robertatakenaka@users.noreply.github.com> --- packtools/sps/validation/funding_group.py | 58 ++++++++++++++-------- tests/sps/validation/test_funding_group.py | 40 +++++++++++++++ 2 files changed, 76 insertions(+), 22 deletions(-) diff --git a/packtools/sps/validation/funding_group.py b/packtools/sps/validation/funding_group.py index 8cfbbe0d1..117877378 100644 --- a/packtools/sps/validation/funding_group.py +++ b/packtools/sps/validation/funding_group.py @@ -203,9 +203,10 @@ def validate_funding_group_uniqueness(self, error_level="ERROR"): def validate_funding_statement_presence(self, error_level="CRITICAL"): """ - Rule 2: Validates that <funding-statement> is present in <funding-group>. + Rule 2: Validates that <funding-statement> is present in EACH <funding-group>. According to SPS 1.10, <funding-statement> is mandatory in all cases. + Each <funding-group> must have its own <funding-statement>. Params ------ @@ -215,7 +216,7 @@ def validate_funding_statement_presence(self, error_level="CRITICAL"): Yields ------ dict - Validation result for funding-statement presence. + Validation result for funding-statement presence in each funding-group. """ funding_groups = self.xml_tree.xpath(".//article-meta/funding-group") @@ -231,26 +232,39 @@ def validate_funding_statement_presence(self, error_level="CRITICAL"): "parent_lang": funding_data.get("article_lang"), } - funding_statement = self.funding.funding_statement - is_valid = funding_statement is not None - - advice = None - if not is_valid: - advice = "Add <funding-statement> element inside <funding-group>. It is mandatory according to SPS 1.10." - - yield build_response( - title="funding-statement presence", - parent=parent, - item="funding-statement", - sub_item=None, - validation_type="exist", - is_valid=is_valid, - expected="<funding-statement> present in <funding-group>", - obtained=funding_statement if funding_statement else "None", - advice=advice, - data={"funding_statement": funding_statement}, - error_level=error_level, - ) + # Validate each funding-group individually + for idx, funding_group_node in enumerate(funding_groups): + funding_statements = funding_group_node.xpath("funding-statement") + is_valid = len(funding_statements) > 0 + + if is_valid: + # Get the text from funding-statement(s) + text_parts = [] + for fs in funding_statements: + raw_text = "".join(fs.itertext()) + cleaned = " ".join(raw_text.split()) + if cleaned: + text_parts.append(cleaned) + funding_statement_text = " ".join(text_parts) + obtained = funding_statement_text if funding_statement_text else "Present but empty" + advice = None + else: + obtained = "None" + advice = f"Add <funding-statement> element inside <funding-group> (index {idx + 1}). It is mandatory according to SPS 1.10." + + yield build_response( + title="funding-statement presence", + parent=parent, + item="funding-statement", + sub_item=None, + validation_type="exist", + is_valid=is_valid, + expected="<funding-statement> present in <funding-group>", + obtained=obtained, + advice=advice, + data={"funding_group_index": idx + 1, "has_funding_statement": is_valid}, + error_level=error_level, + ) def validate_funding_source_in_award_group(self, error_level="CRITICAL"): """ diff --git a/tests/sps/validation/test_funding_group.py b/tests/sps/validation/test_funding_group.py index 9edfa3d2d..31eb531df 100644 --- a/tests/sps/validation/test_funding_group.py +++ b/tests/sps/validation/test_funding_group.py @@ -382,6 +382,46 @@ def test_no_funding_group_no_validation(self): self.assertEqual(len(results), 0) + def test_multiple_funding_groups_second_missing_statement(self): + """ + Case C1: Multiple funding-groups, second one missing funding-statement. + This test validates that each funding-group is checked individually. + """ + xml = """ + <article article-type="research-article" xml:lang="en"> + <front> + <article-meta> + <funding-group> + <award-group> + <funding-source>FAPESP</funding-source> + </award-group> + <funding-statement>First funding statement</funding-statement> + </funding-group> + <funding-group> + <award-group> + <funding-source>CNPq</funding-source> + </award-group> + </funding-group> + </article-meta> + </front> + </article> + """ + xml_tree = etree.fromstring(xml) + validator = FundingGroupValidation(xml_tree, self.params) + results = list(validator.validate_funding_statement_presence()) + + # Should get 2 results: one OK for first funding-group, one CRITICAL for second + self.assertEqual(len(results), 2) + + # First funding-group should be OK + self.assertEqual(results[0]["response"], "OK") + self.assertIn("First funding statement", results[0]["got_value"]) + + # Second funding-group should be CRITICAL (missing funding-statement) + self.assertEqual(results[1]["response"], "CRITICAL") + self.assertIn("Add <funding-statement>", results[1]["advice"]) + self.assertIn("index 2", results[1]["advice"]) + class TestFundingSourceInAwardGroup(TestFundingValidationBase): """Rule 3: Test <funding-source> presence in <award-group> validation""" From 0c2dcdedf2090054a26eaf28bf656f89fbfa5d01 Mon Sep 17 00:00:00 2001 From: Rossi-Luciano <luciano.rossi.lucross@gmail.com> Date: Mon, 9 Mar 2026 09:30:07 -0300 Subject: [PATCH 10/12] =?UTF-8?q?fix(funding=5Fgroup):=20corrige=20validat?= =?UTF-8?q?e=5Ffunding=5Fstatement=20para=20iterar=20por=20n=C3=B3=20XPath?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reescreve validate_funding_statement para iterar diretamente sobre os nós .//article-meta/funding-group via XPath, em vez de statements_by_lang. Problemas corrigidos: - C6: segundo <funding-group> sem <funding-statement> era silenciado, pois statements_by_lang produz uma entrada por idioma e findtext() retorna apenas o primeiro match; agora cada nó é avaliado individualmente. - C7: textos de referência dos <fn> eram concatenados com whitespace bruto no advice string; aplicada normalização " ".join(v.split()) antes do uso. Adiciona TestValidateFundingStatement com quatro casos: detecção do segundo funding-group sem statement (C6), ausência de whitespace no advice (C7), statement válido com similaridade >= 0.8, e early-exit sem award-groups. --- packtools/sps/validation/funding_group.py | 192 ++++++---- tests/sps/validation/test_funding_group.py | 401 ++++++++++++++++++++- 2 files changed, 524 insertions(+), 69 deletions(-) diff --git a/packtools/sps/validation/funding_group.py b/packtools/sps/validation/funding_group.py index 117877378..7b07f0d54 100644 --- a/packtools/sps/validation/funding_group.py +++ b/packtools/sps/validation/funding_group.py @@ -107,54 +107,94 @@ def validate_required_award_ids(self): def validate_funding_statement(self): """ - Validates the existence of funding sources and award IDs. + Validates that each <funding-group> has a <funding-statement> consistent + with the reference texts found in the document (fn elements, ack, etc.). + + Each <funding-group> is evaluated individually so that a second group + without <funding-statement> is not silently skipped (bug C6). Reference + texts are whitespace-normalised before use in advice strings to avoid + raw concatenated whitespace from multiple <fn> elements (bug C7). Yields ------ dict - Validation results for each funding source and award ID. + Validation result per <funding-group> node. """ - if self.funding.award_groups: - for lang, statements in self.funding.statements_by_lang.items(): - parent_id = statements.get("parent_id") - xml = f'<sub-article id="{parent_id}">' if parent_id else "<article>" - advice = None - funding_statement = statements["funding_statement"] - items = {k: v for k, v in statements["texts"].items() if v} - texts = [] - valid = False - if items: - texts = list(items.values()) + if not self.funding.award_groups: + return - if funding_statement and texts: - best_score, best_matches = most_similar(similarity(texts, funding_statement, 0.8)) + funding_groups = self.xml_tree.xpath(".//article-meta/funding-group") + if not funding_groups: + return - if best_matches: - valid = True - else: - valid = False - advice = f'Replace <funding-statement>{funding_statement}</funding-statement> by <funding-statement>{texts[0]}</funding-statement> for {xml}' - elif texts: - valid = False - advice = f'Add <funding-statement>{texts[0]}</funding-statement> in <funding-group> for {xml}. Consult SPS documentation for more detail' - else: - valid = False - advice = f'Add funding statement with <funding-statement> inside <funding-group> for {xml}. Consult SPS documentation for more detail' + funding_data = self.funding.data + parent = { + "parent": "article", + "parent_id": None, + "parent_article_type": funding_data.get("article_type"), + "parent_lang": funding_data.get("article_lang"), + } - yield build_response( - title="funding-statement", - parent=statements, - item="funding-statement", - sub_item=None, - validation_type="match", - is_valid=valid, - expected="funding-statement", - obtained=statements, - advice=advice, - data=statements, - error_level=self.params["funding_statement_error_level"], + # Collect document-level reference texts (fn elements, ack, etc.) + # and normalise whitespace to prevent C7 (raw concatenated whitespace + # from multiple <fn> nodes appearing in advice strings). + all_texts = [] + for lang, statements in self.funding.statements_by_lang.items(): + items = {k: v for k, v in statements["texts"].items() if v} + for v in items.values(): + normalized = " ".join(v.split()) + if normalized: + all_texts.append(normalized) + + # Iterate each <funding-group> individually (C6 fix: each node is + # evaluated; the second group is no longer silently skipped). + for fg_node in funding_groups: + fs_nodes = fg_node.xpath("funding-statement") + funding_statement = None + if fs_nodes: + raw = "".join(fs_nodes[0].itertext()) + funding_statement = " ".join(raw.split()) or None + + texts = all_texts + valid = False + advice = None + + if funding_statement and texts: + best_score, best_matches = most_similar( + similarity(texts, funding_statement, 0.8) + ) + if best_matches: + valid = True + else: + advice = ( + f"Replace <funding-statement>{funding_statement}</funding-statement>" + f" by <funding-statement>{texts[0]}</funding-statement>" + ) + elif texts: + advice = ( + f"Add <funding-statement>{texts[0]}</funding-statement>" + " in <funding-group>. Consult SPS documentation for more detail" + ) + else: + advice = ( + "Add funding statement with <funding-statement> inside" + " <funding-group>. Consult SPS documentation for more detail" ) + yield build_response( + title="funding-statement", + parent=parent, + item="funding-statement", + sub_item=None, + validation_type="match", + is_valid=valid, + expected="funding-statement", + obtained=funding_statement or "None", + advice=advice, + data={"funding_statement": funding_statement, "texts": texts}, + error_level=self.params["funding_statement_error_level"], + ) + def validate_funding_group_uniqueness(self, error_level="ERROR"): """ Rule 1: Validates that <funding-group> appears at most once in <article-meta>. @@ -171,35 +211,51 @@ def validate_funding_group_uniqueness(self, error_level="ERROR"): dict Validation result for funding-group uniqueness. """ - funding_groups = self.xml_tree.xpath(".//article-meta/funding-group") - count = len(funding_groups) - + article_metas = self.xml_tree.xpath(".//article-meta") funding_data = self.funding.data - parent = { - "parent": "article", - "parent_id": None, - "parent_article_type": funding_data.get("article_type"), - "parent_lang": funding_data.get("article_lang"), - } - - is_valid = count <= 1 - advice = None - if not is_valid: - advice = f"Found {count} <funding-group> elements in <article-meta>. Only one is allowed. Merge them into a single <funding-group>." - - yield build_response( - title="funding-group uniqueness", - parent=parent, - item="funding-group", - sub_item=None, - validation_type="unique", - is_valid=is_valid, - expected="At most one <funding-group> in <article-meta>", - obtained=f"{count} <funding-group> element(s) found", - advice=advice, - data={"count": count}, - error_level=error_level, - ) + + for article_meta in article_metas: + funding_groups = article_meta.xpath("./funding-group") + count = len(funding_groups) + + parent_elem = article_meta.getparent() + if parent_elem is not None: + parent_tag = parent_elem.tag + if "}" in parent_tag: + parent_tag = parent_tag.split("}", 1)[1] + parent_id = parent_elem.get("id") + else: + parent_tag = "article" + parent_id = None + + parent = { + "parent": parent_tag, + "parent_id": parent_id, + "parent_article_type": funding_data.get("article_type"), + "parent_lang": funding_data.get("article_lang"), + } + + is_valid = count <= 1 + advice = None + if not is_valid: + advice = ( + f"Found {count} <funding-group> elements in <article-meta>. " + "Only one is allowed. Merge them into a single <funding-group>." + ) + + yield build_response( + title="funding-group uniqueness", + parent=parent, + item="funding-group", + sub_item=None, + validation_type="unique", + is_valid=is_valid, + expected="At most one <funding-group> in <article-meta>", + obtained=f"{count} <funding-group> element(s) found", + advice=advice, + data={"count": count}, + error_level=error_level, + ) def validate_funding_statement_presence(self, error_level="CRITICAL"): """ @@ -355,7 +411,7 @@ def validate_label_absence(self, error_level="ERROR"): expected="No <label> elements in <funding-group>", obtained=f"{count} <label> element(s) found", advice=advice, - data={"count": count, "labels": [label.text for label in labels]}, + data={"count": count, "labels": [t for label in labels if (t := " ".join(label.itertext()).strip())]}, error_level=error_level, ) @@ -401,7 +457,7 @@ def validate_title_absence(self, error_level="ERROR"): expected="No <title> elements in <funding-group>", obtained=f"{count} <title> element(s) found", advice=advice, - data={"count": count, "titles": [title.text for title in titles]}, + data={"count": count, "titles": [t for title in titles if (t := " ".join(title.itertext()).strip())]}, error_level=error_level, ) diff --git a/tests/sps/validation/test_funding_group.py b/tests/sps/validation/test_funding_group.py index 31eb531df..b0623ccd3 100644 --- a/tests/sps/validation/test_funding_group.py +++ b/tests/sps/validation/test_funding_group.py @@ -314,10 +314,85 @@ def test_no_funding_group_valid(self): xml_tree = etree.fromstring(xml) validator = FundingGroupValidation(xml_tree, self.params) results = list(validator.validate_funding_group_uniqueness()) - + self.assertEqual(len(results), 1) self.assertEqual(results[0]["response"], "OK") + def test_sub_article_each_with_one_funding_group_no_false_positive(self): + """ + Sugestão 1: Two sub-articles, each with its own <article-meta> containing + exactly one <funding-group>, must NOT trigger a uniqueness error. + The old implementation (global count) would yield count=2 and raise a + false positive. The corrected implementation validates per article-meta. + """ + xml = """ + <article article-type="research-article" xml:lang="pt"> + <front> + <article-meta> + <funding-group> + <funding-statement>Funded by CNPq</funding-statement> + </funding-group> + </article-meta> + </front> + <sub-article article-type="translation" xml:lang="en" id="s1"> + <front-stub> + <article-meta> + <funding-group> + <funding-statement>Funded by CNPq</funding-statement> + </funding-group> + </article-meta> + </front-stub> + </sub-article> + </article> + """ + xml_tree = etree.fromstring(xml) + validator = FundingGroupValidation(xml_tree, self.params) + results = list(validator.validate_funding_group_uniqueness()) + + # Two article-meta nodes → two results, both OK + self.assertEqual(len(results), 2) + for result in results: + self.assertEqual(result["response"], "OK") + + def test_sub_article_with_multiple_funding_groups_invalid(self): + """ + A sub-article <article-meta> with two <funding-group> must be flagged, + while the main article-meta (with one) remains OK. + """ + xml = """ + <article article-type="research-article" xml:lang="pt"> + <front> + <article-meta> + <funding-group> + <funding-statement>Funded by CNPq</funding-statement> + </funding-group> + </article-meta> + </front> + <sub-article article-type="translation" xml:lang="en" id="s1"> + <front-stub> + <article-meta> + <funding-group> + <funding-statement>Funding A</funding-statement> + </funding-group> + <funding-group> + <funding-statement>Funding B</funding-statement> + </funding-group> + </article-meta> + </front-stub> + </sub-article> + </article> + """ + xml_tree = etree.fromstring(xml) + validator = FundingGroupValidation(xml_tree, self.params) + results = list(validator.validate_funding_group_uniqueness()) + + self.assertEqual(len(results), 2) + # Main article-meta: OK + self.assertEqual(results[0]["response"], "OK") + # Sub-article article-meta: ERROR + self.assertEqual(results[1]["response"], "ERROR") + self.assertIn("2 <funding-group>", results[1]["advice"]) + class TestFundingStatementPresence(TestFundingValidationBase): """Rule 2: Test <funding-statement> presence validation""" @@ -779,5 +854,329 @@ def test_example_6_negative_funding_declaration(self): self.assertEqual(title[0]["response"], "OK") +class TestValidateFundingStatement(TestFundingValidationBase): + """ + Tests for validate_funding_statement — covers the two bugs fixed on 06/03/2026: + + C6 — second <funding-group> without <funding-statement> was silently skipped + because the old implementation iterated statements_by_lang (one entry + per language) instead of per <funding-group> node. + C7 — whitespace from multiple <fn> elements was concatenated raw into the + advice string; fixed by normalising with " ".join(v.split()). + """ + + # XML with two <funding-group>: first has a statement, second does not (C6) + XML_TWO_FG_SECOND_MISSING = """ + <article article-type="research-article" xml:lang="pt"> + <front> + <article-meta> + <funding-group> + <award-group> + <funding-source>FAPESP</funding-source> + <award-id>2022/12345-6</award-id> + </award-group> + <funding-statement>Financiado pela FAPESP processo 2022/12345-6</funding-statement> + </funding-group> + <funding-group> + <award-group> + <funding-source>CNPq</funding-source> + <award-id>123456</award-id> + </award-group> + </funding-group> + </article-meta> + </front> + <back> + <fn-group> + <fn fn-type="financial-disclosure" id="fn-fd1"> + <p>Financiado pela FAPESP processo 2022/12345-6</p> + </fn> + <fn fn-type="financial-disclosure" id="fn-fd2"> + <p>Apoio CNPq 123456</p> + </fn> + </fn-group> + </back> + </article> + """ + + # XML with a single <funding-group> whose statement matches the fn text (valid) + XML_SINGLE_FG_VALID = """ + <article article-type="research-article" xml:lang="pt"> + <front> + <article-meta> + <funding-group> + <award-group> + <funding-source>FAPESP</funding-source> + <award-id>2022/12345-6</award-id> + </award-group> + <funding-statement>Financiado pela FAPESP processo 2022/12345-6</funding-statement> + </funding-group> + </article-meta> + </front> + <back> + <fn-group> + <fn fn-type="financial-disclosure" id="fn-fd1"> + <p>Financiado pela FAPESP processo 2022/12345-6</p> + </fn> + </fn-group> + </back> + </article> + """ + + # XML where the fn text has multi-line / extra whitespace (C7 scenario) + XML_WHITESPACE_FN = """ + <article article-type="research-article" xml:lang="pt"> + <front> + <article-meta> + <funding-group> + <award-group> + <funding-source>FAPESP</funding-source> + <award-id>2022/12345-6</award-id> + </award-group> + <funding-statement>Outro texto completamente diferente</funding-statement> + </funding-group> + </article-meta> + </front> + <back> + <fn-group> + <fn fn-type="financial-disclosure" id="fn-fd1"> + <p>Financiado + pela FAPESP + processo 2022/12345-6</p> + </fn> + <fn fn-type="financial-disclosure" id="fn-fd2"> + <p>Apoio CNPq 123456</p> + </fn> + </fn-group> + </back> + </article> + """ + + def test_c6_second_funding_group_without_statement_is_flagged(self): + """ + C6: When two <funding-group> exist and the second has no + <funding-statement>, validate_funding_statement must yield TWO results + — one OK for the first group and one ERROR/CRITICAL for the second. + The old implementation only yielded one result (silently skipping C6). + """ + xml_tree = etree.fromstring(self.XML_TWO_FG_SECOND_MISSING) + validator = FundingGroupValidation(xml_tree, self.params) + results = list(validator.validate_funding_statement()) + + self.assertEqual(len(results), 2, "Must yield one result per <funding-group>") + # First group has a matching statement → OK + self.assertEqual(results[0]["response"], "OK") + # Second group has no statement → should be invalid + self.assertNotEqual(results[1]["response"], "OK") + self.assertIsNotNone(results[1]["advice"]) + self.assertIn("<funding-statement>", results[1]["advice"]) + + def test_c7_advice_string_has_no_raw_whitespace(self): + """ + C7: When the reference text in an <fn> element contains extra/multi-line + whitespace, the advice string must NOT contain sequences of multiple + spaces or newline characters. Normalization via ' '.join(v.split()) is + required before building the advice. + """ + xml_tree = etree.fromstring(self.XML_WHITESPACE_FN) + validator = FundingGroupValidation(xml_tree, self.params) + results = list(validator.validate_funding_statement()) + + self.assertEqual(len(results), 1) + result = results[0] + advice = result.get("advice", "") or "" + # Must not contain raw runs of whitespace / newlines in the advice + self.assertNotIn("\n", advice, "Advice must not contain newline characters") + self.assertNotRegex(advice, r" +", "Advice must not contain consecutive spaces") + + def test_valid_matching_statement_yields_ok(self): + """ + When the <funding-statement> closely matches the reference fn text, + the result must be OK and advice must be None. + """ + xml_tree = etree.fromstring(self.XML_SINGLE_FG_VALID) + validator = FundingGroupValidation(xml_tree, self.params) + results = list(validator.validate_funding_statement()) + + self.assertEqual(len(results), 1) + self.assertEqual(results[0]["response"], "OK") + self.assertIsNone(results[0]["advice"]) + + def test_early_exit_when_no_award_groups(self): + """ + When there are no <award-group> elements, validate_funding_statement + must yield nothing (early return). + """ + xml = """ + <article article-type="research-article" xml:lang="pt"> + <front> + <article-meta> + <funding-group> + <funding-statement>Estudo realizado sem apoio financeiro externo.</funding-statement> + </funding-group> + </article-meta> + </front> + </article> + """ + xml_tree = etree.fromstring(xml) + validator = FundingGroupValidation(xml_tree, self.params) + results = list(validator.validate_funding_statement()) + + self.assertEqual(len(results), 0, "No award-groups → no results expected") + + +# ======================================== +# Sugestão 3: Testes para o orquestrador validate_funding_data +# Requer: from packtools.sps.validation.xml_validations import validate_funding_data +# ======================================== + + +class TestValidateFundingDataOrchestrator(TestFundingValidationBase): + """ + Sugestão 3: Testes de integração para validate_funding_data em xml_validations.py. + + Verificam que: + (a) todas as novas validações SPS 1.10 são emitidas pelo orquestrador; + (b) os níveis configuráveis via funding_data_rules são propagados corretamente, + especialmente a chave funding_statement_error_level (não + funding_statement_presence_error_level, que era o nome incorreto no PR). + """ + + # Importação condicional: o teste é ignorado se xml_validations não estiver disponível + try: + from packtools.sps.validation.xml_validations import validate_funding_data as _vfd + _orchestrator_available = True + except ImportError: + _orchestrator_available = False + + def setUp(self): + self.xml_full = """ + <article article-type="research-article" xml:lang="en"> + <front> + <article-meta> + <funding-group> + <award-group> + <funding-source>FAPESP</funding-source> + <award-id>04/08142-0</award-id> + </award-group> + <funding-statement>Funded by FAPESP grant 04/08142-0</funding-statement> + </funding-group> + </article-meta> + </front> + </article> + """ + self.xml_missing_statement = """ + <article article-type="research-article" xml:lang="en"> + <front> + <article-meta> + <funding-group> + <award-group> + <funding-source>CNPq</funding-source> + <award-id>123456</award-id> + </award-group> + </funding-group> + </article-meta> + </front> + </article> + """ + + @unittest.skipUnless(_orchestrator_available, "xml_validations não disponível no path") + def test_orchestrator_emits_all_new_validations(self): + """ + Sugestão 3a: validate_funding_data deve emitir resultados para todas as + validações SPS 1.10 (uniqueness, statement presence, source in award-group, + label absence, title absence, consistency). + """ + from packtools.sps.validation.xml_validations import validate_funding_data + + xml_tree = etree.fromstring(self.xml_full) + params = { + "funding_data_rules": { + "special_chars_award_id": ["/", ".", "-"], + "award_id_error_level": "CRITICAL", + "funding_statement_error_level": "CRITICAL", + "funding_group_uniqueness_error_level": "ERROR", + "funding_source_in_award_group_error_level": "CRITICAL", + "label_absence_error_level": "ERROR", + "title_absence_error_level": "ERROR", + "award_id_consistency_error_level": "WARNING", + } + } + results = list(validate_funding_data(xml_tree, params)) + titles = {r["title"] for r in results} + + self.assertIn("funding-group uniqueness", titles) + self.assertIn("funding-statement presence", titles) + self.assertIn("funding-source in award-group", titles) + self.assertIn("label absence in funding-group", titles) + self.assertIn("title absence in funding-group", titles) + self.assertIn("award-id and funding-source consistency", titles) + + @unittest.skipUnless(_orchestrator_available, "xml_validations não disponível no path") + def test_orchestrator_propagates_funding_statement_error_level(self): + """ + Sugestão 3b / Sugestão 2: o orquestrador deve ler a chave + 'funding_statement_error_level' (não 'funding_statement_presence_error_level'). + Configurar como WARNING e verificar que o resultado reflete WARNING, + não o fallback CRITICAL. + """ + from packtools.sps.validation.xml_validations import validate_funding_data + + xml_tree = etree.fromstring(self.xml_missing_statement) + params = { + "funding_data_rules": { + "special_chars_award_id": ["/", ".", "-"], + "award_id_error_level": "CRITICAL", + "funding_statement_error_level": "WARNING", # chave correta + } + } + results = list(validate_funding_data(xml_tree, params)) + statement_results = [r for r in results if r["title"] == "funding-statement presence"] + + self.assertTrue( + len(statement_results) > 0, + "Nenhum resultado de 'funding-statement presence' emitido pelo orquestrador" + ) + for r in statement_results: + self.assertEqual( + r["response"], "CRITICAL", + "O nível configurado via 'funding_statement_error_level' não foi propagado; " + "provavelmente o orquestrador ainda usa a chave incorreta " + "'funding_statement_presence_error_level'." + ) + + @unittest.skipUnless(_orchestrator_available, "xml_validations não disponível no path") + def test_orchestrator_propagates_uniqueness_error_level(self): + """ + Sugestão 3b: funding_group_uniqueness_error_level configurado como WARNING + deve ser refletido no resultado de uniqueness. + """ + from packtools.sps.validation.xml_validations import validate_funding_data + + xml_duplicate = """ + <article article-type="research-article" xml:lang="en"> + <front> + <article-meta> + <funding-group><funding-statement>A</funding-statement></funding-group> + <funding-group><funding-statement>B</funding-statement></funding-group> + </article-meta> + </front> + </article> + """ + xml_tree = etree.fromstring(xml_duplicate) + params = { + "funding_data_rules": { + "special_chars_award_id": ["/", ".", "-"], + "funding_group_uniqueness_error_level": "WARNING", + } + } + results = list(validate_funding_data(xml_tree, params)) + uniqueness_results = [r for r in results if r["title"] == "funding-group uniqueness"] + + invalid = [r for r in uniqueness_results if r["response"] != "OK"] + self.assertTrue(len(invalid) > 0) + for r in invalid: + self.assertEqual(r["response"], "WARNING") + + if __name__ == "__main__": unittest.main() From bfa3e48741674b7ce5c0fcdf913f0a9f7516a73f Mon Sep 17 00:00:00 2001 From: Rossi-Luciano <luciano.rossi.lucross@gmail.com> Date: Mon, 9 Mar 2026 09:35:15 -0300 Subject: [PATCH 11/12] fix(xml_validations): corrige chave funding_statement_error_level no orquestrador --- packtools/sps/validation/xml_validations.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packtools/sps/validation/xml_validations.py b/packtools/sps/validation/xml_validations.py index f815d9de1..fbb586551 100644 --- a/packtools/sps/validation/xml_validations.py +++ b/packtools/sps/validation/xml_validations.py @@ -219,7 +219,7 @@ def validate_funding_data(xmltree, params): error_level=funding_data_rules.get("funding_group_uniqueness_error_level", "ERROR") ) yield from validator.validate_funding_statement_presence( - error_level=funding_data_rules.get("funding_statement_presence_error_level", "CRITICAL") + error_level=funding_data_rules.get("funding_statement_error_level", "CRITICAL") ) yield from validator.validate_funding_source_in_award_group( error_level=funding_data_rules.get("funding_source_in_award_group_error_level", "CRITICAL") From 120cdad15e614095a206eae1e47e5f9ce6fa7896 Mon Sep 17 00:00:00 2001 From: Rossi-Luciano <luciano.rossi.lucross@gmail.com> Date: Mon, 16 Mar 2026 08:58:24 -0300 Subject: [PATCH 12/12] fix(funding_group): corrige falso-negativo, escopo de parent e legibilidade em validate_funding_statement --- packtools/sps/validation/funding_group.py | 62 ++++++++++++++++++---- tests/sps/validation/test_funding_group.py | 8 +-- 2 files changed, 57 insertions(+), 13 deletions(-) diff --git a/packtools/sps/validation/funding_group.py b/packtools/sps/validation/funding_group.py index 7b07f0d54..e28bc79f0 100644 --- a/packtools/sps/validation/funding_group.py +++ b/packtools/sps/validation/funding_group.py @@ -128,12 +128,6 @@ def validate_funding_statement(self): return funding_data = self.funding.data - parent = { - "parent": "article", - "parent_id": None, - "parent_article_type": funding_data.get("article_type"), - "parent_lang": funding_data.get("article_lang"), - } # Collect document-level reference texts (fn elements, ack, etc.) # and normalise whitespace to prevent C7 (raw concatenated whitespace @@ -149,10 +143,32 @@ def validate_funding_statement(self): # Iterate each <funding-group> individually (C6 fix: each node is # evaluated; the second group is no longer silently skipped). for fg_node in funding_groups: + # Infer parent context from the node itself so that sub-article + # scopes are correctly reported (mirrors validate_funding_group_uniqueness). + article_meta = fg_node.getparent() + parent_elem = article_meta.getparent() if article_meta is not None else None + if parent_elem is not None: + parent_tag = parent_elem.tag + if "}" in parent_tag: + parent_tag = parent_tag.split("}", 1)[1] + parent_id = parent_elem.get("id") + else: + parent_tag = "article" + parent_id = None + parent = { + "parent": parent_tag, + "parent_id": parent_id, + "parent_article_type": funding_data.get("article_type"), + "parent_lang": funding_data.get("article_lang"), + } + fs_nodes = fg_node.xpath("funding-statement") funding_statement = None if fs_nodes: - raw = "".join(fs_nodes[0].itertext()) + # Concatenate text from ALL <funding-statement> nodes in this group + # (not just the first) to avoid false-negatives when multiple nodes + # are present — mirrors the approach in validate_funding_statement_presence(). + raw = "".join("".join(node.itertext()) for node in fs_nodes) funding_statement = " ".join(raw.split()) or None texts = all_texts @@ -160,6 +176,7 @@ def validate_funding_statement(self): advice = None if funding_statement and texts: + # Both a <funding-statement> and reference texts exist: compare them. best_score, best_matches = most_similar( similarity(texts, funding_statement, 0.8) ) @@ -170,12 +187,23 @@ def validate_funding_statement(self): f"Replace <funding-statement>{funding_statement}</funding-statement>" f" by <funding-statement>{texts[0]}</funding-statement>" ) + elif funding_statement and not texts: + # <funding-statement> is present but no reference texts (fn/ack) were + # found to compare against. We cannot invalidate the statement, so + # treat as valid and emit an informational advice only. + valid = True + advice = ( + "No reference texts (fn/ack elements) were found to compare with" + " <funding-statement>. Verify manually that the statement is correct." + ) elif texts: + # Reference texts exist but <funding-statement> is absent. advice = ( f"Add <funding-statement>{texts[0]}</funding-statement>" " in <funding-group>. Consult SPS documentation for more detail" ) else: + # Neither <funding-statement> nor reference texts are present. advice = ( "Add funding statement with <funding-statement> inside" " <funding-group>. Consult SPS documentation for more detail" @@ -411,7 +439,15 @@ def validate_label_absence(self, error_level="ERROR"): expected="No <label> elements in <funding-group>", obtained=f"{count} <label> element(s) found", advice=advice, - data={"count": count, "labels": [t for label in labels if (t := " ".join(label.itertext()).strip())]}, + data={ + "count": count, + "labels": [ + text + for label in labels + for text in [" ".join(label.itertext()).strip()] + if text + ], + }, error_level=error_level, ) @@ -457,7 +493,15 @@ def validate_title_absence(self, error_level="ERROR"): expected="No <title> elements in <funding-group>", obtained=f"{count} <title> element(s) found", advice=advice, - data={"count": count, "titles": [t for title in titles if (t := " ".join(title.itertext()).strip())]}, + data={ + "count": count, + "titles": [ + text + for title in titles + for text in [" ".join(title.itertext()).strip()] + if text + ], + }, error_level=error_level, ) diff --git a/tests/sps/validation/test_funding_group.py b/tests/sps/validation/test_funding_group.py index b0623ccd3..e4600e03d 100644 --- a/tests/sps/validation/test_funding_group.py +++ b/tests/sps/validation/test_funding_group.py @@ -1138,10 +1138,10 @@ def test_orchestrator_propagates_funding_statement_error_level(self): ) for r in statement_results: self.assertEqual( - r["response"], "CRITICAL", - "O nível configurado via 'funding_statement_error_level' não foi propagado; " - "provavelmente o orquestrador ainda usa a chave incorreta " - "'funding_statement_presence_error_level'." + r["response"], "WARNING", + "O nível configurado via 'funding_statement_error_level' não foi propagado " + "como WARNING; provavelmente o orquestrador ainda usa a chave incorreta " + "'funding_statement_presence_error_level' ou está caindo no fallback CRITICAL." ) @unittest.skipUnless(_orchestrator_available, "xml_validations não disponível no path")