Skip to content
251 changes: 249 additions & 2 deletions packtools/sps/validation/front_articlemeta_issue.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from packtools.sps.models.front_articlemeta_issue import ArticleMetaIssue
from packtools.sps.validation.utils import build_response
import re


def is_valid_value(value, zero_is_allowed):
Expand Down Expand Up @@ -184,7 +185,7 @@ def validate_issue_format(self):
expected=expected,
obtained=parsed_issue,
advice=(
f"""Replace {self.article_issue} in <article-meta><issue>{self.article_issue}</issue> by with one of {expected}"""
f"Replace {self.article_issue.issue} in <article-meta><issue> with one of {expected}"
if not got_valid_format
else None
),
Expand Down Expand Up @@ -242,6 +243,244 @@ def validate_expected_issues(self):
error_level=self.params["expected_issues_error_level"],
)

def validate_issue_element_uniqueness(self):
"""
Validates that <issue> element appears at most once in <article-meta>.
According to SPS 1.10, only one <issue> element is allowed.

Returns:
dict: Validation response with results
"""
issue_elements = self.xml_tree.findall(".//front/article-meta/issue")
count = len(issue_elements)
is_valid = count <= 1

return build_response(
title="issue element uniqueness",
parent={"parent": "article"},
item="issue",
sub_item=None,
validation_type="unique",
is_valid=is_valid,
expected="at most one <issue> element in <article-meta>",
obtained=f"{count} <issue> element(s) found",
advice=f"Remove duplicate <issue> elements from <article-meta>. Found {count} elements, expected at most 1.",
data={"issue_count": count, "issue_values": [elem.text for elem in issue_elements]},
error_level=self.params.get("issue_element_uniqueness_error_level", "ERROR"),
)

def validate_issue_no_punctuation(self):
"""
Validates that <issue> value does not contain punctuation marks.
According to SPS 1.10, punctuation like . , - / : ; are not allowed.

Returns:
dict: Validation response with results
"""
if not self.article_issue.issue:
return None

issue_value = self.article_issue.issue
# Check for common punctuation marks
punctuation_marks = ['.', ',', '-', '/', ':', ';', '!', '?', '(', ')', '[', ']', '{', '}', '"', "'"]
found_punctuation = [p for p in punctuation_marks if p in issue_value]
is_valid = len(found_punctuation) == 0

return build_response(
title="issue value without punctuation",
parent={"parent": "article"},
item="issue",
sub_item=None,
validation_type="format",
is_valid=is_valid,
expected="issue value without punctuation marks",
obtained=issue_value,
advice=f"Remove punctuation marks {found_punctuation} from <issue> value '{issue_value}'",
data={"issue": issue_value, "punctuation_found": found_punctuation},
error_level=self.params.get("issue_no_punctuation_error_level", "ERROR"),
)

def validate_issue_no_uppercase(self):
"""
Validates that <issue> value does not contain uppercase letters.
According to SPS 1.10, all letters must be lowercase.

Returns:
dict: Validation response with results
"""
if not self.article_issue.issue:
return None

issue_value = self.article_issue.issue
has_uppercase = any(c.isupper() for c in issue_value)
is_valid = not has_uppercase

return build_response(
title="issue value without uppercase",
parent={"parent": "article"},
item="issue",
sub_item=None,
validation_type="format",
is_valid=is_valid,
expected="issue value in lowercase only",
obtained=issue_value,
advice=f"Convert uppercase letters to lowercase in <issue> value '{issue_value}'. Expected: '{issue_value.lower()}'",
data={"issue": issue_value, "expected": issue_value.lower()},
error_level=self.params.get("issue_no_uppercase_error_level", "ERROR"),
)

def validate_issue_supplement_nomenclature(self):
"""
Validates that supplement uses correct nomenclature 'suppl'.
According to SPS 1.10, must use 'suppl' not 'supl', 'supplement', 'sup'.

Returns:
dict: Validation response with results
"""
if not self.article_issue.issue:
return None

issue_value = self.article_issue.issue
issue_lower = issue_value.lower()

# Check if issue contains supplement-related terms
if "sup" not in issue_lower:
return None

# Check for invalid supplement nomenclatures using regex
invalid_patterns = []

# Check for specific invalid patterns
if re.search(r'\bsupl\b', issue_lower):
invalid_patterns.append('supl')
if re.search(r'\bsupplement\b', issue_lower):
invalid_patterns.append('supplement')
if re.search(r'\bsup\b', issue_lower):
invalid_patterns.append('sup')

is_valid = len(invalid_patterns) == 0

return build_response(
title="issue supplement nomenclature",
parent={"parent": "article"},
item="issue",
sub_item="supplement nomenclature",
validation_type="format",
is_valid=is_valid,
expected="supplement nomenclature as 'suppl'",
obtained=issue_value,
advice=f"Use 'suppl' for supplement nomenclature in <issue> value '{issue_value}'. Invalid terms found: {invalid_patterns}",
data={"issue": issue_value, "invalid_terms": invalid_patterns},
error_level=self.params.get("issue_supplement_nomenclature_error_level", "ERROR"),
)

def validate_issue_special_nomenclature(self):
"""
Validates that special issues use correct nomenclature 'spe'.
According to SPS 1.10, must use 'spe' not 'esp', 'nesp', 'nspe', 'especial', 'noesp'.

Returns:
dict: Validation response with results
"""
if not self.article_issue.issue:
return None

issue_value = self.article_issue.issue
issue_lower = issue_value.lower()

# Check if issue contains special issue indicators
special_indicators = ['esp', 'especial', 'nesp', 'nspe', 'noesp']
found_invalid = []

for indicator in special_indicators:
if indicator in issue_lower:
found_invalid.append(indicator)

Comment on lines +391 to +398
Copy link

Copilot AI Mar 11, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

validate_issue_special_nomenclature() uses substring checks (e.g., if indicator in issue_lower), which can both (1) report multiple invalid terms for a single token (e.g., 'nesp1' matches 'esp' and 'nesp') and (2) produce false positives for arbitrary alphanumeric issues that happen to contain 'esp' as a substring (e.g., 'resp1'). Using regex with word boundaries / token boundaries (similar to the supplement nomenclature validation) would avoid partial matches and make invalid_terms accurate.

Copilot uses AI. Check for mistakes.
# If no special issue indicators found, check if 'spe' is present
if not found_invalid and 'spe' not in issue_lower:
return None

is_valid = len(found_invalid) == 0

return build_response(
title="issue special nomenclature",
parent={"parent": "article"},
item="issue",
sub_item="special issue nomenclature",
validation_type="format",
is_valid=is_valid,
expected="special issue nomenclature as 'spe'",
obtained=issue_value,
advice=f"Use 'spe' for special issue nomenclature in <issue> value '{issue_value}'. Invalid terms found: {found_invalid}",
data={"issue": issue_value, "invalid_terms": found_invalid},
error_level=self.params.get("issue_special_nomenclature_error_level", "ERROR"),
)

def validate_no_supplement_element(self):
"""
Validates that <supplement> element does not exist in <article-meta>.
According to SPS 1.10, <supplement> is not allowed in <article-meta>.
Supplements should be identified in <issue> element instead.

Returns:
dict: Validation response with results
"""
supplement_elements = self.xml_tree.findall(".//front/article-meta/supplement")
count = len(supplement_elements)
is_valid = count == 0

return build_response(
title="supplement element not allowed",
parent={"parent": "article"},
item="supplement",
sub_item=None,
validation_type="unexpected",
is_valid=is_valid,
expected="no <supplement> element in <article-meta>",
obtained=f"{count} <supplement> element(s) found",
advice="Remove <supplement> element(s) from <article-meta>. Use <issue> element to indicate supplements (e.g., '4 suppl 1').",
data={"supplement_count": count, "supplement_values": [elem.text for elem in supplement_elements]},
error_level=self.params.get("no_supplement_element_error_level", "CRITICAL"),
)

def validate_issue_no_leading_zeros(self):
"""
Validates that numeric parts of <issue> do not have leading zeros.
According to SPS 1.10, should use '4' not '04'.

Returns:
dict: Validation response with results
"""
if not self.article_issue.issue:
return None

issue_value = self.article_issue.issue
parts = issue_value.split()

# Check each numeric part for leading zeros
issues_found = []
for part in parts:
# Check if part is numeric and has leading zero
if part.isdigit() and len(part) > 1 and part[0] == '0':
issues_found.append(part)

is_valid = len(issues_found) == 0
expected_value = ' '.join([(part.lstrip('0') or '0') if part.isdigit() else part for part in parts])

Comment on lines +456 to +469
Copy link

Copilot AI Mar 11, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

validate_issue_no_leading_zeros() only checks whitespace-separated parts with part.isdigit(). This fails to detect leading zeros in formats where the numeric part is attached to a prefix (e.g., 'spe01' or 'suppl01'), which are common for special issues and should be flagged by the rule. Consider extracting numeric runs with regex (e.g., '(?<=\D)0\d+' / tokenizing alpha+digits) so 'spe01' is reported and the expected value normalizes to 'spe1'.

Suggested change
issue_value = self.article_issue.issue
parts = issue_value.split()
# Check each numeric part for leading zeros
issues_found = []
for part in parts:
# Check if part is numeric and has leading zero
if part.isdigit() and len(part) > 1 and part[0] == '0':
issues_found.append(part)
is_valid = len(issues_found) == 0
expected_value = ' '.join([(part.lstrip('0') or '0') if part.isdigit() else part for part in parts])
issue_value = self.article_issue.issue
parts = issue_value.split()
# Check each numeric part (including those embedded in alphanumeric tokens) for leading zeros
issues_found = []
# Matches a run of digits starting with 0 at the start of the string or immediately after a non-digit
pattern = re.compile(r'(?:(?<=\D)|^)(0\d+)')
for part in parts:
if part.isdigit():
# Entirely numeric token with leading zero
if len(part) > 1 and part[0] == "0":
issues_found.append(part)
else:
# Alphanumeric token: check for embedded numeric runs with leading zeros
if pattern.search(part):
issues_found.append(part)
def normalize_part(part_value):
"""
Normalize numeric content by removing leading zeros.
- Purely numeric tokens: '04' -> '4', '00' -> '0'
- Alphanumeric tokens: 'spe01' -> 'spe1'
"""
if part_value.isdigit():
return str(int(part_value))
return pattern.sub(lambda m: str(int(m.group(1))), part_value)
normalized_parts = [normalize_part(part) for part in parts]
expected_value = " ".join(normalized_parts)
is_valid = len(issues_found) == 0

Copilot uses AI. Check for mistakes.
return build_response(
title="issue value without leading zeros",
parent={"parent": "article"},
item="issue",
sub_item=None,
validation_type="format",
is_valid=is_valid,
expected="numeric values without leading zeros",
obtained=issue_value,
advice=f"Remove leading zeros from numeric parts in <issue> value '{issue_value}'. Expected: '{expected_value}'",
data={"issue": issue_value, "parts_with_leading_zeros": issues_found, "expected": expected_value},
error_level=self.params.get("issue_no_leading_zeros_error_level", "WARNING"),
)

def validate(self):
"""
Performs all validation checks for the issue.
Expand All @@ -255,7 +494,15 @@ def validate(self):
yield self.validate_number_format()
yield self.validate_supplement_format()
yield self.validate_issue_format()
yield self.validate_expected_issues()
yield self.validate_expected_issues()
# New SPS 1.10 validations for <issue> element
yield self.validate_issue_element_uniqueness()
yield self.validate_issue_no_punctuation()
yield self.validate_issue_no_uppercase()
yield self.validate_issue_supplement_nomenclature()
yield self.validate_issue_special_nomenclature()
yield self.validate_no_supplement_element()
yield self.validate_issue_no_leading_zeros()


class PaginationValidation:
Expand Down
Loading
Loading