Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
121 changes: 121 additions & 0 deletions packtools/sps/models/sec.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,121 @@
from packtools.sps.models.article_and_subarticles import Fulltext


VALID_SEC_TYPES = [
"cases",
"conclusions",
"data-availability",
"discussion",
"intro",
"materials",
"methods",
"results",
"subjects",
"supplementary-material",
"transcript",
]

NON_COMBINABLE_SEC_TYPES = [
"data-availability",
"supplementary-material",
"transcript",
]


Comment on lines +4 to +24
Copy link

Copilot AI Apr 28, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

VALID_SEC_TYPES and NON_COMBINABLE_SEC_TYPES are defined but not referenced anywhere in the PR (validation relies on params from sec_rules.json). If these constants are not meant to be part of the public API, consider removing them to avoid dead code, or alternatively use them as defaults in SecValidation/sec_rules.json to keep the source of truth in one place.

Suggested change
VALID_SEC_TYPES = [
"cases",
"conclusions",
"data-availability",
"discussion",
"intro",
"materials",
"methods",
"results",
"subjects",
"supplementary-material",
"transcript",
]
NON_COMBINABLE_SEC_TYPES = [
"data-availability",
"supplementary-material",
"transcript",
]

Copilot uses AI. Check for mistakes.
class Sec:
"""Represents a single <sec> element."""

def __init__(self, element, parent_attribs=None):
self.element = element
self._parent_attribs = parent_attribs or {}

@property
def sec_id(self):
return self.element.get("id")

@property
def sec_type(self):
return self.element.get("sec-type")

@property
def specific_use(self):
return self.element.get("specific-use")

@property
def title(self):
title_elem = self.element.find("title")
if title_elem is not None:
return title_elem.text or ""
return None

@property
def paragraphs(self):
return self.element.findall("p")

@property
def is_first_level(self):
parent = self.element.getparent()
if parent is not None:
return parent.tag in ("body", "back", "abstract", "trans-abstract",
"app", "bio", "boxed-text")
return True

@property
def data(self):
d = {
"sec_id": self.sec_id,
"sec_type": self.sec_type,
"specific_use": self.specific_use,
"title": self.title,
"has_title": self.title is not None,
"paragraph_count": len(self.paragraphs),
"is_first_level": self.is_first_level,
}
d.update(self._parent_attribs)
return d


class ArticleSecs:
"""Extracts all <sec> elements from an article and sub-articles."""

def __init__(self, xmltree):
self.xmltree = xmltree

@property
def main_article_type(self):
return self.xmltree.find(".").get("article-type")

def _get_secs_from_node(self, node, parent_attribs):
for sec_elem in node.xpath(".//sec"):
yield Sec(sec_elem, parent_attribs).data
Comment on lines +88 to +90
Copy link

Copilot AI Apr 28, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ArticleSecs.all_secs() uses node.xpath('.//sec') for the main article node, which will also traverse into <sub-article> descendants. Since the loop also iterates over translation sub-articles separately, the same <sec> elements can be yielded twice (duplicating validation results). Consider changing the XPath for the main article to exclude ancestor::sub-article (or restrict the search to ./front|./body|./back|./front-stub like other models) so each <sec> is reported only once per language/article context.

Copilot uses AI. Check for mistakes.

@property
def all_secs(self):
for node in self.xmltree.xpath(
". | ./sub-article[@article-type='translation']"
):
fulltext = Fulltext(node)
parent_attribs = fulltext.attribs_parent_prefixed
yield from self._get_secs_from_node(fulltext.node, parent_attribs)

@property
def first_level_body_secs(self):
"""Get only first-level <sec> elements inside <body>."""
for node in self.xmltree.xpath(
". | ./sub-article[@article-type='translation']"
):
fulltext = Fulltext(node)
parent_attribs = fulltext.attribs_parent_prefixed
body = fulltext.body
if body is not None:
for sec_elem in body.findall("sec"):
yield Sec(sec_elem, parent_attribs).data

@property
def body_sec_types(self):
"""Get all sec-type values from first-level body secs."""
return [
sec["sec_type"]
for sec in self.first_level_body_secs
if sec.get("sec_type")
]
238 changes: 238 additions & 0 deletions packtools/sps/validation/sec.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,238 @@
from packtools.sps.models.sec import ArticleSecs
from packtools.sps.validation.utils import build_response


class SecValidation:
"""Validates a single <sec> element."""

def __init__(self, data, params):
self.data = data
self.params = params

def validate(self):
yield self.validate_title()
result = self.validate_sec_type_value()
if result:
yield result
result = self.validate_transcript_id()
if result:
yield result
result = self.validate_combined_format()
if result:
yield result
result = self.validate_non_combinable()
if result:
yield result
yield self.validate_content()

def validate_title(self):
"""Rule 1: <title> is mandatory in <sec> for accessibility."""
has_title = self.data.get("has_title")
return build_response(
title="sec title",
parent=self.data,
item="sec",
sub_item="title",
validation_type="exist",
is_valid=has_title,
expected="<title> element in <sec>",
obtained=self.data.get("title"),
advice="Add <title> element to <sec> for accessibility",
data=self.data,
error_level=self.params.get("title_error_level", "CRITICAL"),
)

def validate_sec_type_value(self):
"""Rule 2: When present, @sec-type must have a valid value."""
sec_type = self.data.get("sec_type")
if not sec_type:
return None

valid_sec_types = self.params.get("valid_sec_types", [])
# Handle combined types (e.g. "materials|methods")
parts = sec_type.split("|")
is_valid = all(part in valid_sec_types for part in parts)

Comment on lines +45 to +55
Copy link

Copilot AI Apr 28, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

validate_sec_type_value() treats sec-type="materials methods" (or comma-separated) as a single invalid token and returns an ERROR, and validate_combined_format() will also emit a WARNING. This conflicts with the PR description where wrong separators are intended to be a WARNING when the underlying types are valid. A concrete fix is to normalize separators (split on |, space, and ,), strip tokens, validate token values against valid_sec_types, and let only the combined-format rule report the separator problem.

Copilot uses AI. Check for mistakes.
return build_response(
title="sec type value",
parent=self.data,
item="sec",
sub_item="@sec-type",
validation_type="value in list",
is_valid=is_valid,
expected=str(valid_sec_types),
obtained=sec_type,
advice=f'Replace @sec-type="{sec_type}" with a valid value: {valid_sec_types}',
data=self.data,
error_level=self.params.get("sec_type_value_error_level", "ERROR"),
)

def validate_transcript_id(self):
"""Rule 3: <sec sec-type="transcript"> must have @id."""
sec_type = self.data.get("sec_type")
if sec_type != "transcript":
return None

sec_id = self.data.get("sec_id")
is_valid = bool(sec_id)

return build_response(
title="transcript id",
parent=self.data,
item="sec",
sub_item="@id",
validation_type="exist",
is_valid=is_valid,
expected='@id attribute in <sec sec-type="transcript">',
obtained=sec_id,
advice='Add @id attribute to <sec sec-type="transcript">',
data=self.data,
error_level=self.params.get("transcript_id_error_level", "ERROR"),
)

def validate_combined_format(self):
"""Rule 5: Combined sec-types must use pipe separator."""
sec_type = self.data.get("sec_type")
if not sec_type:
return None

# Only check if it looks like a combined type (contains separator chars)
# If it contains spaces or commas but not pipes, it's incorrectly formatted
has_space_separator = " " in sec_type and "|" not in sec_type
has_comma_separator = "," in sec_type and "|" not in sec_type

if not has_space_separator and not has_comma_separator:
return None

return build_response(
title="sec type combined format",
parent=self.data,
item="sec",
sub_item="@sec-type",
validation_type="format",
is_valid=False,
expected='Combined sec-types separated by pipe "|" (e.g., "materials|methods")',
obtained=sec_type,
advice=f'Use pipe "|" as separator in @sec-type="{sec_type}" (e.g., "materials|methods")',
data=self.data,
error_level=self.params.get("combined_format_error_level", "WARNING"),
)

def validate_non_combinable(self):
"""Rule 6: transcript, supplementary-material, and data-availability cannot be combined."""
sec_type = self.data.get("sec_type")
if not sec_type or "|" not in sec_type:
return None

non_combinable = self.params.get(
"non_combinable_sec_types",
["data-availability", "supplementary-material", "transcript"],
)
parts = sec_type.split("|")

found_non_combinable = [p for p in parts if p in non_combinable]
if not found_non_combinable:
return None

return build_response(
title="sec type non-combinable",
parent=self.data,
item="sec",
sub_item="@sec-type",
validation_type="format",
is_valid=False,
expected=f"Types {non_combinable} must not be combined with other types",
obtained=sec_type,
advice=f'Do not combine "{found_non_combinable[0]}" with other types in @sec-type="{sec_type}"',
data=self.data,
error_level=self.params.get("non_combinable_error_level", "WARNING"),
)

def validate_content(self):
"""Rule 7: <sec> should contain at least one <p> after <title>."""
paragraph_count = self.data.get("paragraph_count", 0)
is_valid = paragraph_count > 0

return build_response(
title="sec content",
parent=self.data,
item="sec",
sub_item="p",
validation_type="exist",
is_valid=is_valid,
expected="At least one <p> element in <sec>",
obtained=f"{paragraph_count} paragraphs",
advice="Add at least one <p> element to <sec>",
data=self.data,
error_level=self.params.get("content_error_level", "WARNING"),
)


class XMLSecValidation:
"""Validates all <sec> elements in the XML document."""

def __init__(self, xmltree, params):
self.xmltree = xmltree
self.params = params
self.article_secs = ArticleSecs(xmltree)

def validate(self):
yield from self.validate_secs()
yield from self.validate_data_availability_presence()

def validate_secs(self):
"""Validate each <sec> element individually."""
for sec_data in self.article_secs.all_secs:
validator = SecValidation(sec_data, self.params)
yield from validator.validate()

def validate_data_availability_presence(self):
"""Rule 4: Certain article types require a data-availability section."""
required_types = self.params.get(
"data_availability_required_article_types",
[
"data-article",
"brief-report",
"case-report",
"rapid-communication",
"research-article",
"review-article",
],
)

article_type = self.article_secs.main_article_type
if not article_type or article_type not in required_types:
return

body_sec_types = self.article_secs.body_sec_types
has_data_availability = "data-availability" in body_sec_types
Comment on lines +189 to +208
Copy link

Copilot AI Apr 28, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

validate_data_availability_presence() currently checks only first-level <body>/<sec> types (ArticleSecs.body_sec_types). Elsewhere in the codebase, data availability is considered valid when provided as either <sec sec-type="data-availability"> or <fn fn-type="data-availability"> in <body> or <back> (see DataAvailability/DataAvailabilityValidation). As written, this can incorrectly emit an ERROR for valid documents (e.g., statement in <back> or as <fn>). Consider either reusing DataAvailabilityValidation.validate_data_availability_exist() for this rule, or expanding the search to match the same locations/types to avoid contradictory results.

Copilot uses AI. Check for mistakes.

parent = {
"parent": "article",
"parent_id": None,
"parent_article_type": article_type,
"parent_lang": self.xmltree.find(".").get(
"{http://www.w3.org/XML/1998/namespace}lang"
),
}

yield build_response(
title="data availability section",
parent=parent,
item="sec",
sub_item='@sec-type="data-availability"',
validation_type="exist",
is_valid=has_data_availability,
expected='<sec sec-type="data-availability"> in <body>',
obtained=(
'<sec sec-type="data-availability">'
if has_data_availability
else "missing"
),
advice=(
f'Add <sec sec-type="data-availability" specific-use="..."> to <body> '
f'(required for article-type="{article_type}")'
),
data=parent,
error_level=self.params.get("data_availability_error_level", "ERROR"),
)
19 changes: 19 additions & 0 deletions packtools/sps/validation/xml_validations.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@
from packtools.sps.validation.history import HistoryValidation
from packtools.sps.validation.ext_link import ExtLinkValidation
from packtools.sps.validation.graphic import XMLGraphicValidation
from packtools.sps.validation.sec import XMLSecValidation


def validate_affiliations(xmltree, params):
Expand Down Expand Up @@ -374,3 +375,21 @@ def validate_graphics(xmltree, params):
graphic_rules = params["graphic_rules"]
validator = XMLGraphicValidation(xmltree, graphic_rules)
yield from validator.validate()


def validate_secs(xmltree, params):
"""
Validates <sec> elements according to SPS 1.10 specification.

Validates:
- <title> presence (accessibility requirement)
- @sec-type valid values
- @id for transcript sections
- data-availability section presence for required article types
- Combined sec-type format
- Non-combinable sec-types
- Content presence
"""
sec_rules = params["sec_rules"]
validator = XMLSecValidation(xmltree, sec_rules)
yield from validator.validate()
4 changes: 4 additions & 0 deletions packtools/sps/validation/xml_validator.py
Original file line number Diff line number Diff line change
Expand Up @@ -157,3 +157,7 @@ def validate_xml_content(xmltree, rules):
"group": "graphic",
"items": xml_validations.validate_graphics(xmltree, params),
}
yield {
"group": "sec",
"items": xml_validations.validate_secs(xmltree, params),
}
Loading
Loading