diff --git a/packtools/sps/models/sec.py b/packtools/sps/models/sec.py new file mode 100644 index 000000000..ad531beb3 --- /dev/null +++ b/packtools/sps/models/sec.py @@ -0,0 +1,121 @@ +from packtools.sps.models.article_and_subarticles import Fulltext + + +VALID_SEC_TYPES = [ + "cases", + "conclusions", + "data-availability", + "discussion", + "intro", + "materials", + "methods", + "results", + "subjects", + "supplementary-material", + "transcript", +] + +NON_COMBINABLE_SEC_TYPES = [ + "data-availability", + "supplementary-material", + "transcript", +] + + +class Sec: + """Represents a single element.""" + + def __init__(self, element, parent_attribs=None): + self.element = element + self._parent_attribs = parent_attribs or {} + + @property + def sec_id(self): + return self.element.get("id") + + @property + def sec_type(self): + return self.element.get("sec-type") + + @property + def specific_use(self): + return self.element.get("specific-use") + + @property + def title(self): + title_elem = self.element.find("title") + if title_elem is not None: + return title_elem.text or "" + return None + + @property + def paragraphs(self): + return self.element.findall("p") + + @property + def is_first_level(self): + parent = self.element.getparent() + if parent is not None: + return parent.tag in ("body", "back", "abstract", "trans-abstract", + "app", "bio", "boxed-text") + return True + + @property + def data(self): + d = { + "sec_id": self.sec_id, + "sec_type": self.sec_type, + "specific_use": self.specific_use, + "title": self.title, + "has_title": self.title is not None, + "paragraph_count": len(self.paragraphs), + "is_first_level": self.is_first_level, + } + d.update(self._parent_attribs) + return d + + +class ArticleSecs: + """Extracts all elements from an article and sub-articles.""" + + def __init__(self, xmltree): + self.xmltree = xmltree + + @property + def main_article_type(self): + return self.xmltree.find(".").get("article-type") + + def _get_secs_from_node(self, node, parent_attribs): + for sec_elem in node.xpath(".//sec"): + yield Sec(sec_elem, parent_attribs).data + + @property + def all_secs(self): + for node in self.xmltree.xpath( + ". | ./sub-article[@article-type='translation']" + ): + fulltext = Fulltext(node) + parent_attribs = fulltext.attribs_parent_prefixed + yield from self._get_secs_from_node(fulltext.node, parent_attribs) + + @property + def first_level_body_secs(self): + """Get only first-level elements inside .""" + for node in self.xmltree.xpath( + ". | ./sub-article[@article-type='translation']" + ): + fulltext = Fulltext(node) + parent_attribs = fulltext.attribs_parent_prefixed + body = fulltext.body + if body is not None: + for sec_elem in body.findall("sec"): + yield Sec(sec_elem, parent_attribs).data + + @property + def body_sec_types(self): + """Get all sec-type values from first-level body secs.""" + return [ + sec["sec_type"] + for sec in self.first_level_body_secs + if sec.get("sec_type") + ] diff --git a/packtools/sps/validation/sec.py b/packtools/sps/validation/sec.py new file mode 100644 index 000000000..30336d981 --- /dev/null +++ b/packtools/sps/validation/sec.py @@ -0,0 +1,238 @@ +from packtools.sps.models.sec import ArticleSecs +from packtools.sps.validation.utils import build_response + + +class SecValidation: + """Validates a single element.""" + + def __init__(self, data, params): + self.data = data + self.params = params + + def validate(self): + yield self.validate_title() + result = self.validate_sec_type_value() + if result: + yield result + result = self.validate_transcript_id() + if result: + yield result + result = self.validate_combined_format() + if result: + yield result + result = self.validate_non_combinable() + if result: + yield result + yield self.validate_content() + + def validate_title(self): + """Rule 1: is mandatory in <sec> for accessibility.""" + has_title = self.data.get("has_title") + return build_response( + title="sec title", + parent=self.data, + item="sec", + sub_item="title", + validation_type="exist", + is_valid=has_title, + expected="<title> element in <sec>", + obtained=self.data.get("title"), + advice="Add <title> element to <sec> for accessibility", + data=self.data, + error_level=self.params.get("title_error_level", "CRITICAL"), + ) + + def validate_sec_type_value(self): + """Rule 2: When present, @sec-type must have a valid value.""" + sec_type = self.data.get("sec_type") + if not sec_type: + return None + + valid_sec_types = self.params.get("valid_sec_types", []) + # Handle combined types (e.g. "materials|methods") + parts = sec_type.split("|") + is_valid = all(part in valid_sec_types for part in parts) + + return build_response( + title="sec type value", + parent=self.data, + item="sec", + sub_item="@sec-type", + validation_type="value in list", + is_valid=is_valid, + expected=str(valid_sec_types), + obtained=sec_type, + advice=f'Replace @sec-type="{sec_type}" with a valid value: {valid_sec_types}', + data=self.data, + error_level=self.params.get("sec_type_value_error_level", "ERROR"), + ) + + def validate_transcript_id(self): + """Rule 3: <sec sec-type="transcript"> must have @id.""" + sec_type = self.data.get("sec_type") + if sec_type != "transcript": + return None + + sec_id = self.data.get("sec_id") + is_valid = bool(sec_id) + + return build_response( + title="transcript id", + parent=self.data, + item="sec", + sub_item="@id", + validation_type="exist", + is_valid=is_valid, + expected='@id attribute in <sec sec-type="transcript">', + obtained=sec_id, + advice='Add @id attribute to <sec sec-type="transcript">', + data=self.data, + error_level=self.params.get("transcript_id_error_level", "ERROR"), + ) + + def validate_combined_format(self): + """Rule 5: Combined sec-types must use pipe separator.""" + sec_type = self.data.get("sec_type") + if not sec_type: + return None + + # Only check if it looks like a combined type (contains separator chars) + # If it contains spaces or commas but not pipes, it's incorrectly formatted + has_space_separator = " " in sec_type and "|" not in sec_type + has_comma_separator = "," in sec_type and "|" not in sec_type + + if not has_space_separator and not has_comma_separator: + return None + + return build_response( + title="sec type combined format", + parent=self.data, + item="sec", + sub_item="@sec-type", + validation_type="format", + is_valid=False, + expected='Combined sec-types separated by pipe "|" (e.g., "materials|methods")', + obtained=sec_type, + advice=f'Use pipe "|" as separator in @sec-type="{sec_type}" (e.g., "materials|methods")', + data=self.data, + error_level=self.params.get("combined_format_error_level", "WARNING"), + ) + + def validate_non_combinable(self): + """Rule 6: transcript, supplementary-material, and data-availability cannot be combined.""" + sec_type = self.data.get("sec_type") + if not sec_type or "|" not in sec_type: + return None + + non_combinable = self.params.get( + "non_combinable_sec_types", + ["data-availability", "supplementary-material", "transcript"], + ) + parts = sec_type.split("|") + + found_non_combinable = [p for p in parts if p in non_combinable] + if not found_non_combinable: + return None + + return build_response( + title="sec type non-combinable", + parent=self.data, + item="sec", + sub_item="@sec-type", + validation_type="format", + is_valid=False, + expected=f"Types {non_combinable} must not be combined with other types", + obtained=sec_type, + advice=f'Do not combine "{found_non_combinable[0]}" with other types in @sec-type="{sec_type}"', + data=self.data, + error_level=self.params.get("non_combinable_error_level", "WARNING"), + ) + + def validate_content(self): + """Rule 7: <sec> should contain at least one <p> after <title>.""" + paragraph_count = self.data.get("paragraph_count", 0) + is_valid = paragraph_count > 0 + + return build_response( + title="sec content", + parent=self.data, + item="sec", + sub_item="p", + validation_type="exist", + is_valid=is_valid, + expected="At least one <p> element in <sec>", + obtained=f"{paragraph_count} paragraphs", + advice="Add at least one <p> element to <sec>", + data=self.data, + error_level=self.params.get("content_error_level", "WARNING"), + ) + + +class XMLSecValidation: + """Validates all <sec> elements in the XML document.""" + + def __init__(self, xmltree, params): + self.xmltree = xmltree + self.params = params + self.article_secs = ArticleSecs(xmltree) + + def validate(self): + yield from self.validate_secs() + yield from self.validate_data_availability_presence() + + def validate_secs(self): + """Validate each <sec> element individually.""" + for sec_data in self.article_secs.all_secs: + validator = SecValidation(sec_data, self.params) + yield from validator.validate() + + def validate_data_availability_presence(self): + """Rule 4: Certain article types require a data-availability section.""" + required_types = self.params.get( + "data_availability_required_article_types", + [ + "data-article", + "brief-report", + "case-report", + "rapid-communication", + "research-article", + "review-article", + ], + ) + + article_type = self.article_secs.main_article_type + if not article_type or article_type not in required_types: + return + + body_sec_types = self.article_secs.body_sec_types + has_data_availability = "data-availability" in body_sec_types + + parent = { + "parent": "article", + "parent_id": None, + "parent_article_type": article_type, + "parent_lang": self.xmltree.find(".").get( + "{http://www.w3.org/XML/1998/namespace}lang" + ), + } + + yield build_response( + title="data availability section", + parent=parent, + item="sec", + sub_item='@sec-type="data-availability"', + validation_type="exist", + is_valid=has_data_availability, + expected='<sec sec-type="data-availability"> in <body>', + obtained=( + '<sec sec-type="data-availability">' + if has_data_availability + else "missing" + ), + advice=( + f'Add <sec sec-type="data-availability" specific-use="..."> to <body> ' + f'(required for article-type="{article_type}")' + ), + data=parent, + error_level=self.params.get("data_availability_error_level", "ERROR"), + ) diff --git a/packtools/sps/validation/xml_validations.py b/packtools/sps/validation/xml_validations.py index d62b5f9f9..40bca4872 100644 --- a/packtools/sps/validation/xml_validations.py +++ b/packtools/sps/validation/xml_validations.py @@ -48,6 +48,7 @@ from packtools.sps.validation.history import HistoryValidation from packtools.sps.validation.ext_link import ExtLinkValidation from packtools.sps.validation.graphic import XMLGraphicValidation +from packtools.sps.validation.sec import XMLSecValidation def validate_affiliations(xmltree, params): @@ -374,3 +375,21 @@ def validate_graphics(xmltree, params): graphic_rules = params["graphic_rules"] validator = XMLGraphicValidation(xmltree, graphic_rules) yield from validator.validate() + + +def validate_secs(xmltree, params): + """ + Validates <sec> elements according to SPS 1.10 specification. + + Validates: + - <title> presence (accessibility requirement) + - @sec-type valid values + - @id for transcript sections + - data-availability section presence for required article types + - Combined sec-type format + - Non-combinable sec-types + - Content presence + """ + sec_rules = params["sec_rules"] + validator = XMLSecValidation(xmltree, sec_rules) + yield from validator.validate() diff --git a/packtools/sps/validation/xml_validator.py b/packtools/sps/validation/xml_validator.py index 1b6d7b311..3bf31c176 100644 --- a/packtools/sps/validation/xml_validator.py +++ b/packtools/sps/validation/xml_validator.py @@ -157,3 +157,7 @@ def validate_xml_content(xmltree, rules): "group": "graphic", "items": xml_validations.validate_graphics(xmltree, params), } + yield { + "group": "sec", + "items": xml_validations.validate_secs(xmltree, params), + } diff --git a/packtools/sps/validation_rules/sec_rules.json b/packtools/sps/validation_rules/sec_rules.json new file mode 100644 index 000000000..835ebb4e3 --- /dev/null +++ b/packtools/sps/validation_rules/sec_rules.json @@ -0,0 +1,37 @@ +{ + "sec_rules": { + "title_error_level": "CRITICAL", + "sec_type_value_error_level": "ERROR", + "transcript_id_error_level": "ERROR", + "data_availability_error_level": "ERROR", + "combined_format_error_level": "WARNING", + "non_combinable_error_level": "WARNING", + "content_error_level": "WARNING", + "valid_sec_types": [ + "cases", + "conclusions", + "data-availability", + "discussion", + "intro", + "materials", + "methods", + "results", + "subjects", + "supplementary-material", + "transcript" + ], + "non_combinable_sec_types": [ + "data-availability", + "supplementary-material", + "transcript" + ], + "data_availability_required_article_types": [ + "data-article", + "brief-report", + "case-report", + "rapid-communication", + "research-article", + "review-article" + ] + } +} diff --git a/tests/sps/validation/test_sec.py b/tests/sps/validation/test_sec.py new file mode 100644 index 000000000..c9f9eb2c3 --- /dev/null +++ b/tests/sps/validation/test_sec.py @@ -0,0 +1,789 @@ +""" +Tests for SecValidation and XMLSecValidation classes according to SPS 1.10 specification. + +Tests validation of <sec> elements including: +- <title> presence (accessibility requirement) +- @sec-type valid values +- @id requirement for transcript sections +- data-availability section presence for indexable article types +- Combined sec-type format (pipe separator) +- Non-combinable sec-types +- Content presence (<p> elements) +""" + +import unittest +from lxml import etree + +from packtools.sps.models.sec import ArticleSecs +from packtools.sps.validation.sec import SecValidation, XMLSecValidation + + +class TestSecValidationTitle(unittest.TestCase): + """Test Rule 1: <title> is mandatory in <sec>.""" + + def setUp(self): + self.params = { + "title_error_level": "CRITICAL", + "sec_type_value_error_level": "ERROR", + "transcript_id_error_level": "ERROR", + "data_availability_error_level": "ERROR", + "combined_format_error_level": "WARNING", + "non_combinable_error_level": "WARNING", + "content_error_level": "WARNING", + "valid_sec_types": [ + "cases", "conclusions", "data-availability", "discussion", + "intro", "materials", "methods", "results", "subjects", + "supplementary-material", "transcript", + ], + "non_combinable_sec_types": [ + "data-availability", "supplementary-material", "transcript", + ], + "data_availability_required_article_types": [ + "data-article", "brief-report", "case-report", + "rapid-communication", "research-article", "review-article", + ], + } + + def test_sec_with_title_passes(self): + xml = """ + <article> + <body> + <sec sec-type="methods"> + <title>Methods +

Content here.

+
+ + + """ + tree = etree.fromstring(xml.encode()) + secs = list(ArticleSecs(tree).all_secs) + validator = SecValidation(secs[0], self.params) + result = validator.validate_title() + self.assertEqual(result["response"], "OK") + + def test_sec_without_title_fails(self): + xml = """ +
+ + +

Content without title.

+
+ +
+ """ + tree = etree.fromstring(xml.encode()) + secs = list(ArticleSecs(tree).all_secs) + validator = SecValidation(secs[0], self.params) + result = validator.validate_title() + self.assertEqual(result["response"], "CRITICAL") + self.assertIn("Add ", result["advice"]) + + +class TestSecValidationSecTypeValue(unittest.TestCase): + """Test Rule 2: @sec-type must have a valid value when present.""" + + def setUp(self): + self.params = { + "title_error_level": "CRITICAL", + "sec_type_value_error_level": "ERROR", + "transcript_id_error_level": "ERROR", + "data_availability_error_level": "ERROR", + "combined_format_error_level": "WARNING", + "non_combinable_error_level": "WARNING", + "content_error_level": "WARNING", + "valid_sec_types": [ + "cases", "conclusions", "data-availability", "discussion", + "intro", "materials", "methods", "results", "subjects", + "supplementary-material", "transcript", + ], + "non_combinable_sec_types": [ + "data-availability", "supplementary-material", "transcript", + ], + "data_availability_required_article_types": [], + } + + def test_valid_sec_type_passes(self): + xml = """ + <article> + <body> + <sec sec-type="methods"> + <title>Methods +

Content.

+
+ + + """ + tree = etree.fromstring(xml.encode()) + secs = list(ArticleSecs(tree).all_secs) + validator = SecValidation(secs[0], self.params) + result = validator.validate_sec_type_value() + self.assertEqual(result["response"], "OK") + + def test_invalid_sec_type_fails(self): + xml = """ +
+ + + Something +

Content.

+
+ +
+ """ + tree = etree.fromstring(xml.encode()) + secs = list(ArticleSecs(tree).all_secs) + validator = SecValidation(secs[0], self.params) + result = validator.validate_sec_type_value() + self.assertEqual(result["response"], "ERROR") + self.assertEqual(result["got_value"], "invalid-type") + + def test_no_sec_type_returns_none(self): + xml = """ +
+ + + Free Section +

Content.

+
+ +
+ """ + tree = etree.fromstring(xml.encode()) + secs = list(ArticleSecs(tree).all_secs) + validator = SecValidation(secs[0], self.params) + result = validator.validate_sec_type_value() + self.assertIsNone(result) + + def test_combined_sec_type_valid(self): + xml = """ +
+ + + Materials and Methods +

Content.

+
+ +
+ """ + tree = etree.fromstring(xml.encode()) + secs = list(ArticleSecs(tree).all_secs) + validator = SecValidation(secs[0], self.params) + result = validator.validate_sec_type_value() + self.assertEqual(result["response"], "OK") + + +class TestSecValidationTranscriptId(unittest.TestCase): + """Test Rule 3: must have @id.""" + + def setUp(self): + self.params = { + "title_error_level": "CRITICAL", + "sec_type_value_error_level": "ERROR", + "transcript_id_error_level": "ERROR", + "data_availability_error_level": "ERROR", + "combined_format_error_level": "WARNING", + "non_combinable_error_level": "WARNING", + "content_error_level": "WARNING", + "valid_sec_types": [ + "cases", "conclusions", "data-availability", "discussion", + "intro", "materials", "methods", "results", "subjects", + "supplementary-material", "transcript", + ], + "non_combinable_sec_types": [ + "data-availability", "supplementary-material", "transcript", + ], + "data_availability_required_article_types": [], + } + + def test_transcript_with_id_passes(self): + xml = """ +
+ + + Interview Transcript +

Content.

+
+ +
+ """ + tree = etree.fromstring(xml.encode()) + secs = list(ArticleSecs(tree).all_secs) + validator = SecValidation(secs[0], self.params) + result = validator.validate_transcript_id() + self.assertEqual(result["response"], "OK") + + def test_transcript_without_id_fails(self): + xml = """ +
+ + + Interview Transcript +

Content.

+
+ +
+ """ + tree = etree.fromstring(xml.encode()) + secs = list(ArticleSecs(tree).all_secs) + validator = SecValidation(secs[0], self.params) + result = validator.validate_transcript_id() + self.assertEqual(result["response"], "ERROR") + self.assertIn("Add @id", result["advice"]) + + def test_non_transcript_returns_none(self): + xml = """ +
+ + + Methods +

Content.

+
+ +
+ """ + tree = etree.fromstring(xml.encode()) + secs = list(ArticleSecs(tree).all_secs) + validator = SecValidation(secs[0], self.params) + result = validator.validate_transcript_id() + self.assertIsNone(result) + + +class TestSecValidationCombinedFormat(unittest.TestCase): + """Test Rule 5: Combined sec-types must use pipe separator.""" + + def setUp(self): + self.params = { + "title_error_level": "CRITICAL", + "sec_type_value_error_level": "ERROR", + "transcript_id_error_level": "ERROR", + "data_availability_error_level": "ERROR", + "combined_format_error_level": "WARNING", + "non_combinable_error_level": "WARNING", + "content_error_level": "WARNING", + "valid_sec_types": [ + "cases", "conclusions", "data-availability", "discussion", + "intro", "materials", "methods", "results", "subjects", + "supplementary-material", "transcript", + ], + "non_combinable_sec_types": [ + "data-availability", "supplementary-material", "transcript", + ], + "data_availability_required_article_types": [], + } + + def test_pipe_separator_returns_none(self): + xml = """ +
+ + + Materials and Methods +

Content.

+
+ +
+ """ + tree = etree.fromstring(xml.encode()) + secs = list(ArticleSecs(tree).all_secs) + validator = SecValidation(secs[0], self.params) + result = validator.validate_combined_format() + self.assertIsNone(result) + + def test_space_separator_fails(self): + xml = """ +
+ + + Materials and Methods +

Content.

+
+ +
+ """ + tree = etree.fromstring(xml.encode()) + secs = list(ArticleSecs(tree).all_secs) + validator = SecValidation(secs[0], self.params) + result = validator.validate_combined_format() + self.assertEqual(result["response"], "WARNING") + + def test_comma_separator_fails(self): + xml = """ +
+ + + Materials and Methods +

Content.

+
+ +
+ """ + tree = etree.fromstring(xml.encode()) + secs = list(ArticleSecs(tree).all_secs) + validator = SecValidation(secs[0], self.params) + result = validator.validate_combined_format() + self.assertEqual(result["response"], "WARNING") + + def test_single_sec_type_returns_none(self): + xml = """ +
+ + + Methods +

Content.

+
+ +
+ """ + tree = etree.fromstring(xml.encode()) + secs = list(ArticleSecs(tree).all_secs) + validator = SecValidation(secs[0], self.params) + result = validator.validate_combined_format() + self.assertIsNone(result) + + +class TestSecValidationNonCombinable(unittest.TestCase): + """Test Rule 6: transcript, supplementary-material, data-availability cannot be combined.""" + + def setUp(self): + self.params = { + "title_error_level": "CRITICAL", + "sec_type_value_error_level": "ERROR", + "transcript_id_error_level": "ERROR", + "data_availability_error_level": "ERROR", + "combined_format_error_level": "WARNING", + "non_combinable_error_level": "WARNING", + "content_error_level": "WARNING", + "valid_sec_types": [ + "cases", "conclusions", "data-availability", "discussion", + "intro", "materials", "methods", "results", "subjects", + "supplementary-material", "transcript", + ], + "non_combinable_sec_types": [ + "data-availability", "supplementary-material", "transcript", + ], + "data_availability_required_article_types": [], + } + + def test_combined_transcript_fails(self): + xml = """ +
+ + + Transcript and Methods +

Content.

+
+ +
+ """ + tree = etree.fromstring(xml.encode()) + secs = list(ArticleSecs(tree).all_secs) + validator = SecValidation(secs[0], self.params) + result = validator.validate_non_combinable() + self.assertEqual(result["response"], "WARNING") + + def test_combined_data_availability_fails(self): + xml = """ +
+ + + Data Availability +

Content.

+
+ +
+ """ + tree = etree.fromstring(xml.encode()) + secs = list(ArticleSecs(tree).all_secs) + validator = SecValidation(secs[0], self.params) + result = validator.validate_non_combinable() + self.assertEqual(result["response"], "WARNING") + + def test_combined_valid_types_returns_none(self): + xml = """ +
+ + + Materials and Methods +

Content.

+
+ +
+ """ + tree = etree.fromstring(xml.encode()) + secs = list(ArticleSecs(tree).all_secs) + validator = SecValidation(secs[0], self.params) + result = validator.validate_non_combinable() + self.assertIsNone(result) + + def test_no_pipe_returns_none(self): + xml = """ +
+ + + Transcript +

Content.

+
+ +
+ """ + tree = etree.fromstring(xml.encode()) + secs = list(ArticleSecs(tree).all_secs) + validator = SecValidation(secs[0], self.params) + result = validator.validate_non_combinable() + self.assertIsNone(result) + + +class TestSecValidationContent(unittest.TestCase): + """Test Rule 7: should contain at least one

.""" + + def setUp(self): + self.params = { + "title_error_level": "CRITICAL", + "sec_type_value_error_level": "ERROR", + "transcript_id_error_level": "ERROR", + "data_availability_error_level": "ERROR", + "combined_format_error_level": "WARNING", + "non_combinable_error_level": "WARNING", + "content_error_level": "WARNING", + "valid_sec_types": [ + "cases", "conclusions", "data-availability", "discussion", + "intro", "materials", "methods", "results", "subjects", + "supplementary-material", "transcript", + ], + "non_combinable_sec_types": [ + "data-availability", "supplementary-material", "transcript", + ], + "data_availability_required_article_types": [], + } + + def test_sec_with_paragraph_passes(self): + xml = """ +

+ + + Methods +

Method description.

+
+ +
+ """ + tree = etree.fromstring(xml.encode()) + secs = list(ArticleSecs(tree).all_secs) + validator = SecValidation(secs[0], self.params) + result = validator.validate_content() + self.assertEqual(result["response"], "OK") + + def test_sec_without_paragraph_fails(self): + xml = """ +
+ + + Methods + + +
+ """ + tree = etree.fromstring(xml.encode()) + secs = list(ArticleSecs(tree).all_secs) + validator = SecValidation(secs[0], self.params) + result = validator.validate_content() + self.assertEqual(result["response"], "WARNING") + self.assertIn("Add at least one

", result["advice"]) + + +class TestXMLSecValidationDataAvailability(unittest.TestCase): + """Test Rule 4: data-availability section required for certain article types.""" + + def setUp(self): + self.params = { + "title_error_level": "CRITICAL", + "sec_type_value_error_level": "ERROR", + "transcript_id_error_level": "ERROR", + "data_availability_error_level": "ERROR", + "combined_format_error_level": "WARNING", + "non_combinable_error_level": "WARNING", + "content_error_level": "WARNING", + "valid_sec_types": [ + "cases", "conclusions", "data-availability", "discussion", + "intro", "materials", "methods", "results", "subjects", + "supplementary-material", "transcript", + ], + "non_combinable_sec_types": [ + "data-availability", "supplementary-material", "transcript", + ], + "data_availability_required_article_types": [ + "data-article", "brief-report", "case-report", + "rapid-communication", "research-article", "review-article", + ], + } + + def test_research_article_with_data_availability_passes(self): + xml = """ +

+ + + Introduction +

Intro content.

+
+ + Data Availability +

Data available on request.

+
+ +
+ """ + tree = etree.fromstring(xml.encode()) + validator = XMLSecValidation(tree, self.params) + results = [r for r in validator.validate_data_availability_presence() + if r is not None] + self.assertEqual(len(results), 1) + self.assertEqual(results[0]["response"], "OK") + + def test_research_article_without_data_availability_fails(self): + xml = """ +
+ + + Introduction +

Intro content.

+
+ + Methods +

Methods content.

+
+ +
+ """ + tree = etree.fromstring(xml.encode()) + validator = XMLSecValidation(tree, self.params) + results = [r for r in validator.validate_data_availability_presence() + if r is not None] + self.assertEqual(len(results), 1) + self.assertEqual(results[0]["response"], "ERROR") + + def test_editorial_no_data_availability_check(self): + xml = """ +
+ + + Editorial +

Editorial content.

+
+ +
+ """ + tree = etree.fromstring(xml.encode()) + validator = XMLSecValidation(tree, self.params) + results = list(validator.validate_data_availability_presence()) + self.assertEqual(len(results), 0) + + def test_case_report_with_data_availability_passes(self): + xml = """ +
+ + + Case Study +

Case content.

+
+ + Data Availability +

Data available.

+
+ +
+ """ + tree = etree.fromstring(xml.encode()) + validator = XMLSecValidation(tree, self.params) + results = [r for r in validator.validate_data_availability_presence() + if r is not None] + self.assertEqual(len(results), 1) + self.assertEqual(results[0]["response"], "OK") + + +class TestXMLSecValidationIntegration(unittest.TestCase): + """Integration tests for XMLSecValidation.validate().""" + + def setUp(self): + self.params = { + "title_error_level": "CRITICAL", + "sec_type_value_error_level": "ERROR", + "transcript_id_error_level": "ERROR", + "data_availability_error_level": "ERROR", + "combined_format_error_level": "WARNING", + "non_combinable_error_level": "WARNING", + "content_error_level": "WARNING", + "valid_sec_types": [ + "cases", "conclusions", "data-availability", "discussion", + "intro", "materials", "methods", "results", "subjects", + "supplementary-material", "transcript", + ], + "non_combinable_sec_types": [ + "data-availability", "supplementary-material", "transcript", + ], + "data_availability_required_article_types": [ + "data-article", "brief-report", "case-report", + "rapid-communication", "research-article", "review-article", + ], + } + + def test_valid_complete_article(self): + xml = """ +
+ + + Introduction +

Introduction content.

+
+ + Materials and Methods +

Methods content.

+
+ + Results +

Results content.

+
+ + Discussion +

Discussion content.

+
+ + Conclusions +

Conclusions content.

+
+ + Data Availability +

Data available on request.

+
+ +
+ """ + tree = etree.fromstring(xml.encode()) + validator = XMLSecValidation(tree, self.params) + results = [r for r in validator.validate() if r is not None] + + # All results should be OK + for result in results: + self.assertEqual(result["response"], "OK", f"Failed: {result['title']} - {result.get('advice')}") + + def test_sec_with_subsec(self): + xml = """ +
+ + + Methodology + + Methodology in Science +

Lorem ipsum dolor sit amet.

+
+
+ +
+ """ + tree = etree.fromstring(xml.encode()) + validator = XMLSecValidation(tree, self.params) + results = [r for r in validator.validate() if r is not None] + + # Parent sec has no direct

(only subsec), but subsec has

+ # Filter only "sec content" results + content_results = [r for r in results if r["title"] == "sec content"] + # The parent sec should have WARNING (no direct p), subsec should be OK + parent_content = content_results[0] + self.assertEqual(parent_content["response"], "WARNING") + subsec_content = content_results[1] + self.assertEqual(subsec_content["response"], "OK") + + def test_transcript_sec_without_id_fails(self): + xml = """ +

+ + + Interview +

Content.

+
+ +
+ """ + tree = etree.fromstring(xml.encode()) + validator = XMLSecValidation(tree, self.params) + results = [r for r in validator.validate() if r is not None] + + # Find the transcript_id result + transcript_results = [r for r in results if r["title"] == "transcript id"] + self.assertEqual(len(transcript_results), 1) + self.assertEqual(transcript_results[0]["response"], "ERROR") + + +class TestSecModel(unittest.TestCase): + """Test the ArticleSecs model.""" + + def test_all_secs_returns_all_sections(self): + xml = """ +
+ + + Introduction +

Content.

+
+ + Methods +

Content.

+ + Subsection +

Sub content.

+
+
+ +
+ """ + tree = etree.fromstring(xml.encode()) + secs = list(ArticleSecs(tree).all_secs) + self.assertEqual(len(secs), 3) + + def test_first_level_body_secs(self): + xml = """ +
+ + + Introduction +

Content.

+
+ + Methods +

Content.

+ + Subsection +

Sub content.

+
+
+ +
+ """ + tree = etree.fromstring(xml.encode()) + first_level = list(ArticleSecs(tree).first_level_body_secs) + self.assertEqual(len(first_level), 2) + + def test_body_sec_types(self): + xml = """ +
+ + + Introduction +

Content.

+
+ + Methods +

Content.

+
+ + Free Section +

Content.

+
+ +
+ """ + tree = etree.fromstring(xml.encode()) + sec_types = ArticleSecs(tree).body_sec_types + self.assertEqual(sec_types, ["intro", "methods"]) + + +if __name__ == "__main__": + unittest.main()