diff --git a/packtools/sps/models/product.py b/packtools/sps/models/product.py new file mode 100644 index 000000000..2260f3103 --- /dev/null +++ b/packtools/sps/models/product.py @@ -0,0 +1,130 @@ +""" +Model for extracting elements from XML documents. + + is used for marking book review references when related to a book +or book chapter. It should appear in articles with @article-type="book-review". + +Example: + + + + ONFRAY + Michel + + + La comunidad filosófica + Gedisa + 2008 + +""" + + +class ArticleProducts: + """ + Extracts all elements from an XML article document. + + Processes the main article's to extract product information. + """ + + def __init__(self, xmltree): + """ + Initialize with XML tree. + + Parameters + ---------- + xmltree : lxml.etree._Element + The root element of the XML document + """ + self.xmltree = xmltree + + @property + def article_type(self): + """Returns the article-type attribute from the root
element.""" + return self.xmltree.get("article-type") + + @property + def article_lang(self): + """Returns the xml:lang attribute from the root
element.""" + return self.xmltree.get("{http://www.w3.org/XML/1998/namespace}lang") + + @property + def products(self): + """ + Extract all elements from article-meta. + + Yields + ------ + dict + Dictionary containing product information: + - product_type: Value of @product-type attribute + - source: Text content of element + - has_author: Whether exists + - has_publisher_name: Whether exists + - has_year: Whether exists + - person_groups: List of person-group types found + - isbn: Text content of element + - publisher_loc: Text content of element + - size: Text content of element + - parent: "article" + - parent_id: None + - parent_article_type: Article type attribute + - parent_lang: Article language attribute + """ + article_type = self.article_type + article_lang = self.article_lang + + for product in self.xmltree.xpath(".//front/article-meta/product"): + product_type = product.get("product-type") + + source_elem = product.find("source") + source = None + if source_elem is not None: + source = (source_elem.text or "").strip() + + person_groups = [ + pg.get("person-group-type") + for pg in product.findall("person-group") + ] + + has_author = any( + pg.get("person-group-type") == "author" + for pg in product.findall("person-group") + ) + + publisher_name_elem = product.find("publisher-name") + has_publisher_name = ( + publisher_name_elem is not None + and bool((publisher_name_elem.text or "").strip()) + ) + + year_elem = product.find("year") + has_year = year_elem is not None and bool((year_elem.text or "").strip()) + + isbn_elem = product.find("isbn") + isbn = (isbn_elem.text or "").strip() if isbn_elem is not None else None + + publisher_loc_elem = product.find("publisher-loc") + publisher_loc = ( + (publisher_loc_elem.text or "").strip() + if publisher_loc_elem is not None + else None + ) + + size_elem = product.find("size") + size = (size_elem.text or "").strip() if size_elem is not None else None + + yield { + "product_type": product_type, + "source": source, + "has_author": has_author, + "has_publisher_name": has_publisher_name, + "has_year": has_year, + "person_groups": person_groups, + "isbn": isbn, + "publisher_loc": publisher_loc, + "size": size, + "parent": "article", + "parent_id": None, + "parent_article_type": article_type, + "parent_lang": article_lang, + } diff --git a/packtools/sps/validation/product.py b/packtools/sps/validation/product.py new file mode 100644 index 000000000..71e1ce485 --- /dev/null +++ b/packtools/sps/validation/product.py @@ -0,0 +1,370 @@ +""" +Validation for elements according to SPS 1.10 specification. + +Implements validations for book review product elements to ensure: +- Mandatory attribute @product-type is present with value "book" +- Mandatory element (book title) is present +- Consistency with @article-type="book-review" +- Recommended elements (author, publisher, year) are present + +Reference: https://docs.google.com/document/d/1GTv4Inc2LS_AXY-ToHT3HmO66UT0VAHWJNOIqzBNSgA/edit?tab=t.0#heading=h.product +""" +import gettext + +from packtools.sps.models.product import ArticleProducts +from packtools.sps.validation.utils import build_response + +_ = gettext.gettext + + +class ProductValidation: + """ + Validates a single element. + + Parameters + ---------- + data : dict + Product data dictionary from ArticleProducts.products + rules : dict + Validation rules with error levels + """ + + def __init__(self, data, rules): + self.data = data + self.rules = rules + + @property + def parent(self): + """Returns parent context dict for build_response.""" + return { + "parent": self.data.get("parent"), + "parent_id": self.data.get("parent_id"), + "parent_article_type": self.data.get("parent_article_type"), + "parent_lang": self.data.get("parent_lang"), + } + + def validate_product_type_presence(self): + """ + Validate presence of @product-type attribute (CRITICAL). + + SPS Rule: @product-type is mandatory in all elements. + + Returns + ------- + dict + Validation result + """ + error_level = self.rules.get("product_type_presence_error_level", "CRITICAL") + product_type = self.data.get("product_type") + is_valid = bool(product_type and product_type.strip()) + + advice_text = _( + 'Add @product-type attribute to .' + ' Expected value: "book"' + ) + advice_params = {} + + return build_response( + title="@product-type attribute", + parent=self.parent, + item="product", + sub_item="@product-type", + validation_type="exist", + is_valid=is_valid, + expected='@product-type attribute present with value "book"', + obtained=product_type, + advice=advice_text, + data=self.data, + error_level=error_level, + advice_text=advice_text, + advice_params=advice_params, + ) + + def validate_product_type_value(self): + """ + Validate that @product-type has value "book" (ERROR). + + SPS Rule: @product-type must be "book" for book reviews. + Only runs when @product-type is present (non-empty). + + Returns + ------- + dict or None + Validation result, or None if @product-type is absent + """ + error_level = self.rules.get("product_type_value_error_level", "ERROR") + product_type = self.data.get("product_type") + expected_values = self.rules.get("product_type_list", ["book"]) + + # Skip if product_type is absent (handled by validate_product_type_presence) + if not product_type or not product_type.strip(): + return None + + is_valid = product_type in expected_values + + advice_text = _( + 'Replace @product-type="{product_type}" with "book".' + " Valid values: {allowed_values}" + ) + advice_params = { + "product_type": product_type, + "allowed_values": ", ".join(expected_values), + } + + return build_response( + title="@product-type value", + parent=self.parent, + item="product", + sub_item="@product-type", + validation_type="value in list", + is_valid=is_valid, + expected=", ".join(expected_values), + obtained=product_type, + advice=advice_text.format(**advice_params), + data=self.data, + error_level=error_level, + advice_text=advice_text, + advice_params=advice_params, + ) + + def validate_source_presence(self): + """ + Validate presence of element (CRITICAL). + + SPS Rule: (book title) is mandatory in . + + Returns + ------- + dict + Validation result + """ + error_level = self.rules.get("source_presence_error_level", "CRITICAL") + source = self.data.get("source") + + is_valid = source is not None and bool(source.strip()) + + advice_text = _( + "Add element with the book title inside " + ) + advice_params = {} + + return build_response( + title="source element", + parent=self.parent, + item="product", + sub_item="source", + validation_type="exist", + is_valid=is_valid, + expected=" element with book title", + obtained=source, + advice=advice_text, + data=self.data, + error_level=error_level, + advice_text=advice_text, + advice_params=advice_params, + ) + + def validate_article_type_consistency(self): + """ + Validate consistency between and @article-type (ERROR). + + SPS Rule: When is present,
should have + @article-type="book-review". + + Returns + ------- + dict + Validation result + """ + error_level = self.rules.get("article_type_consistency_error_level", "ERROR") + article_type = self.data.get("parent_article_type") + + is_valid = article_type == "book-review" + + advice_text = _( + ' is present but @article-type="{article_type}".' + ' For book reviews, use @article-type="book-review" in
' + ) + advice_params = {"article_type": article_type} + + return build_response( + title="article-type consistency", + parent=self.parent, + item="product", + sub_item="@article-type", + validation_type="value", + is_valid=is_valid, + expected="book-review", + obtained=article_type, + advice=advice_text.format(**advice_params), + data=self.data, + error_level=error_level, + advice_text=advice_text, + advice_params=advice_params, + ) + + def validate_author_presence(self): + """ + Validate presence of author person-group (WARNING). + + SPS Rule: Recommended that contains + . + + Returns + ------- + dict + Validation result + """ + error_level = self.rules.get("author_presence_error_level", "WARNING") + has_author = self.data.get("has_author", False) + + advice_text = _( + "Add with the" + " author(s) of the reviewed book inside " + ) + advice_params = {} + + return build_response( + title="author in product", + parent=self.parent, + item="product", + sub_item="person-group", + validation_type="exist", + is_valid=has_author, + expected='', + obtained=str(has_author), + advice=advice_text, + data=self.data, + error_level=error_level, + advice_text=advice_text, + advice_params=advice_params, + ) + + def validate_publisher_name_presence(self): + """ + Validate presence of element (WARNING). + + SPS Rule: Recommended that contains . + + Returns + ------- + dict + Validation result + """ + error_level = self.rules.get("publisher_name_presence_error_level", "WARNING") + has_publisher = self.data.get("has_publisher_name", False) + + advice_text = _( + "Add element with the publisher name" + " inside for bibliographic completeness" + ) + advice_params = {} + + return build_response( + title="publisher-name in product", + parent=self.parent, + item="product", + sub_item="publisher-name", + validation_type="exist", + is_valid=has_publisher, + expected="", + obtained=str(has_publisher), + advice=advice_text, + data=self.data, + error_level=error_level, + advice_text=advice_text, + advice_params=advice_params, + ) + + def validate_year_presence(self): + """ + Validate presence of element (WARNING). + + SPS Rule: Recommended that contains . + + Returns + ------- + dict + Validation result + """ + error_level = self.rules.get("year_presence_error_level", "WARNING") + has_year = self.data.get("has_year", False) + + advice_text = _( + "Add element with the publication year" + " inside for bibliographic completeness" + ) + advice_params = {} + + return build_response( + title="year in product", + parent=self.parent, + item="product", + sub_item="year", + validation_type="exist", + is_valid=has_year, + expected="", + obtained=str(has_year), + advice=advice_text, + data=self.data, + error_level=error_level, + advice_text=advice_text, + advice_params=advice_params, + ) + + def validate(self): + """ + Run all product validations. + + Returns + ------- + list + List of validation results (None values filtered out) + """ + validations = [ + self.validate_product_type_presence, + self.validate_product_type_value, + self.validate_source_presence, + self.validate_article_type_consistency, + self.validate_author_presence, + self.validate_publisher_name_presence, + self.validate_year_presence, + ] + return [response for validate in validations if (response := validate())] + + +class ArticleProductValidation: + """ + Validates all elements in an XML article. + + Parameters + ---------- + xmltree : lxml.etree._Element + The root element of the XML document + rules : dict + Validation rules with error levels + """ + + def __init__(self, xmltree, rules): + if not hasattr(xmltree, "get"): + raise ValueError("xmltree must be a valid XML object.") + if not isinstance(rules, dict): + raise ValueError("rules must be a dictionary containing error levels.") + + self.xmltree = xmltree + self.rules = rules + self.products_model = ArticleProducts(xmltree) + + def validate(self): + """ + Validate all product elements. + + Yields + ------ + dict + Validation results for each product element + """ + products = list(self.products_model.products) + + for product_data in products: + yield from ProductValidation(product_data, self.rules).validate() diff --git a/packtools/sps/validation/xml_validations.py b/packtools/sps/validation/xml_validations.py index d62b5f9f9..dcc646846 100644 --- a/packtools/sps/validation/xml_validations.py +++ b/packtools/sps/validation/xml_validations.py @@ -48,6 +48,7 @@ from packtools.sps.validation.history import HistoryValidation from packtools.sps.validation.ext_link import ExtLinkValidation from packtools.sps.validation.graphic import XMLGraphicValidation +from packtools.sps.validation.product import ArticleProductValidation def validate_affiliations(xmltree, params): @@ -374,3 +375,18 @@ def validate_graphics(xmltree, params): graphic_rules = params["graphic_rules"] validator = XMLGraphicValidation(xmltree, graphic_rules) yield from validator.validate() + + +def validate_products(xmltree, params): + """ + Validates elements according to SPS 1.10 specification. + + Validates: + - @product-type attribute presence and value + - element presence + - Consistency with @article-type="book-review" + - Recommended elements (author, publisher-name, year) + """ + product_rules = params["product_rules"] + validator = ArticleProductValidation(xmltree, product_rules) + yield from validator.validate() diff --git a/packtools/sps/validation/xml_validator.py b/packtools/sps/validation/xml_validator.py index 1b6d7b311..9c3d1e952 100644 --- a/packtools/sps/validation/xml_validator.py +++ b/packtools/sps/validation/xml_validator.py @@ -157,3 +157,7 @@ def validate_xml_content(xmltree, rules): "group": "graphic", "items": xml_validations.validate_graphics(xmltree, params), } + yield { + "group": "product", + "items": xml_validations.validate_products(xmltree, params), + } diff --git a/packtools/sps/validation_rules/product_rules.json b/packtools/sps/validation_rules/product_rules.json new file mode 100644 index 000000000..a82010630 --- /dev/null +++ b/packtools/sps/validation_rules/product_rules.json @@ -0,0 +1,12 @@ +{ + "product_rules": { + "product_type_presence_error_level": "CRITICAL", + "product_type_value_error_level": "ERROR", + "source_presence_error_level": "CRITICAL", + "article_type_consistency_error_level": "ERROR", + "author_presence_error_level": "WARNING", + "publisher_name_presence_error_level": "WARNING", + "year_presence_error_level": "WARNING", + "product_type_list": ["book"] + } +} diff --git a/tests/sps/validation/test_product.py b/tests/sps/validation/test_product.py new file mode 100644 index 000000000..019bc79a1 --- /dev/null +++ b/tests/sps/validation/test_product.py @@ -0,0 +1,1319 @@ +""" +Unit tests for validation according to SPS 1.10. + +Tests cover: +- @product-type attribute presence and value +- element presence +- Consistency with @article-type="book-review" +- Recommended elements (author, publisher-name, year) +- Multiple products +- Edge cases +""" +import unittest +from lxml import etree + +from packtools.sps.validation.product import ProductValidation, ArticleProductValidation + + +def filter_results(results): + """Filter out None values from validator results.""" + return [r for r in results if r is not None] + + +class TestProductTypePresence(unittest.TestCase): + """Tests for @product-type attribute presence (CRITICAL).""" + + def _get_validator(self, xml_content, rules=None): + xmltree = etree.fromstring(xml_content) + rules = rules or {"product_type_presence_error_level": "CRITICAL"} + validator = ArticleProductValidation(xmltree, rules) + products = list(validator.products_model.products) + if products: + return ProductValidation(products[0], rules) + return None + + def test_product_type_book_ok(self): + """ with @product-type="book" should be OK.""" + xml = """ +
+ + + + Book Title + + + +
+ """ + validator = self._get_validator(xml) + result = validator.validate_product_type_presence() + self.assertEqual(result["response"], "OK") + + def test_product_type_missing_critical(self): + """ without @product-type should be CRITICAL.""" + xml = """ +
+ + + + Book Title + + + +
+ """ + validator = self._get_validator(xml) + result = validator.validate_product_type_presence() + self.assertEqual(result["response"], "CRITICAL") + self.assertIsNotNone(result["advice"]) + self.assertIsNotNone(result["adv_text"]) + + def test_product_type_empty_critical(self): + """ with empty @product-type should be CRITICAL.""" + xml = """ +
+ + + + Book Title + + + +
+ """ + validator = self._get_validator(xml) + result = validator.validate_product_type_presence() + self.assertEqual(result["response"], "CRITICAL") + + def test_product_type_spaces_only_critical(self): + """ with @product-type containing only spaces should be CRITICAL.""" + xml = """ +
+ + + + Book Title + + + +
+ """ + validator = self._get_validator(xml) + result = validator.validate_product_type_presence() + self.assertEqual(result["response"], "CRITICAL") + + +class TestProductTypeValue(unittest.TestCase): + """Tests for @product-type value validation (ERROR).""" + + def _get_validator(self, xml_content, rules=None): + xmltree = etree.fromstring(xml_content) + rules = rules or { + "product_type_value_error_level": "ERROR", + "product_type_list": ["book"], + } + validator = ArticleProductValidation(xmltree, rules) + products = list(validator.products_model.products) + if products: + return ProductValidation(products[0], rules) + return None + + def test_product_type_book_ok(self): + """@product-type="book" should be OK.""" + xml = """ +
+ + + + Book Title + + + +
+ """ + validator = self._get_validator(xml) + result = validator.validate_product_type_value() + self.assertEqual(result["response"], "OK") + + def test_product_type_other_error(self): + """@product-type="other" should be ERROR.""" + xml = """ +
+ + + + Book Title + + + +
+ """ + validator = self._get_validator(xml) + result = validator.validate_product_type_value() + self.assertEqual(result["response"], "ERROR") + self.assertIn("other", result["advice"]) + + def test_product_type_journal_error(self): + """@product-type="journal" should be ERROR.""" + xml = """ +
+ + + + Book Title + + + +
+ """ + validator = self._get_validator(xml) + result = validator.validate_product_type_value() + self.assertEqual(result["response"], "ERROR") + + def test_product_type_uppercase_book_error(self): + """@product-type="Book" (uppercase) should be ERROR.""" + xml = """ +
+ + + + Book Title + + + +
+ """ + validator = self._get_validator(xml) + result = validator.validate_product_type_value() + self.assertEqual(result["response"], "ERROR") + + def test_product_type_all_uppercase_book_error(self): + """@product-type="BOOK" (all uppercase) should be ERROR.""" + xml = """ +
+ + + + Book Title + + + +
+ """ + validator = self._get_validator(xml) + result = validator.validate_product_type_value() + self.assertEqual(result["response"], "ERROR") + + def test_product_type_absent_returns_none(self): + """Missing @product-type should return None (handled by presence check).""" + xml = """ +
+ + + + Book Title + + + +
+ """ + validator = self._get_validator(xml) + result = validator.validate_product_type_value() + self.assertIsNone(result) + + def test_product_type_empty_returns_none(self): + """Empty @product-type should return None (handled by presence check).""" + xml = """ +
+ + + + Book Title + + + +
+ """ + validator = self._get_validator(xml) + result = validator.validate_product_type_value() + self.assertIsNone(result) + + +class TestSourcePresence(unittest.TestCase): + """Tests for element presence (CRITICAL).""" + + def _get_validator(self, xml_content, rules=None): + xmltree = etree.fromstring(xml_content) + rules = rules or {"source_presence_error_level": "CRITICAL"} + validator = ArticleProductValidation(xmltree, rules) + products = list(validator.products_model.products) + if products: + return ProductValidation(products[0], rules) + return None + + def test_source_present_ok(self): + """ with should be OK.""" + xml = """ +
+ + + + Book Title + + + +
+ """ + validator = self._get_validator(xml) + result = validator.validate_source_presence() + self.assertEqual(result["response"], "OK") + + def test_source_with_special_characters_ok(self): + """ with special characters should be OK.""" + xml = """ +
+ + + + La comunidad filosófica: manifiesto por una universidad popular + + + +
+ """ + validator = self._get_validator(xml) + result = validator.validate_source_presence() + self.assertEqual(result["response"], "OK") + + def test_source_missing_critical(self): + """ without should be CRITICAL.""" + xml = """ +
+ + + + Oxford University Press + + + +
+ """ + validator = self._get_validator(xml) + result = validator.validate_source_presence() + self.assertEqual(result["response"], "CRITICAL") + + def test_source_empty_critical(self): + """ with empty content should be CRITICAL.""" + xml = """ +
+ + + + + + + +
+ """ + validator = self._get_validator(xml) + result = validator.validate_source_presence() + self.assertEqual(result["response"], "CRITICAL") + + def test_source_spaces_only_critical(self): + """ with only spaces should be CRITICAL.""" + xml = """ +
+ + + + + + + +
+ """ + validator = self._get_validator(xml) + result = validator.validate_source_presence() + self.assertEqual(result["response"], "CRITICAL") + + def test_product_empty_critical(self): + """Empty should have CRITICAL for missing source.""" + xml = """ +
+ + + + + + +
+ """ + validator = self._get_validator(xml) + result = validator.validate_source_presence() + self.assertEqual(result["response"], "CRITICAL") + + +class TestArticleTypeConsistency(unittest.TestCase): + """Tests for article-type consistency (ERROR).""" + + def _get_validator(self, xml_content, rules=None): + xmltree = etree.fromstring(xml_content) + rules = rules or {"article_type_consistency_error_level": "ERROR"} + validator = ArticleProductValidation(xmltree, rules) + products = list(validator.products_model.products) + if products: + return ProductValidation(products[0], rules) + return None + + def test_book_review_article_type_ok(self): + """ with article-type="book-review" should be OK.""" + xml = """ +
+ + + + Book Title + + + +
+ """ + validator = self._get_validator(xml) + result = validator.validate_article_type_consistency() + self.assertEqual(result["response"], "OK") + + def test_research_article_type_error(self): + """ with article-type="research-article" should be ERROR.""" + xml = """ +
+ + + + Book Title + + + +
+ """ + validator = self._get_validator(xml) + result = validator.validate_article_type_consistency() + self.assertEqual(result["response"], "ERROR") + self.assertIn("research-article", result["advice"]) + + def test_review_article_type_error(self): + """ with article-type="review-article" should be ERROR.""" + xml = """ +
+ + + + Book Title + + + +
+ """ + validator = self._get_validator(xml) + result = validator.validate_article_type_consistency() + self.assertEqual(result["response"], "ERROR") + + def test_no_article_type_error(self): + """ without article-type in
should be ERROR.""" + xml = """ +
+ + + + Book Title + + + +
+ """ + validator = self._get_validator(xml) + result = validator.validate_article_type_consistency() + self.assertEqual(result["response"], "ERROR") + + +class TestAuthorPresence(unittest.TestCase): + """Tests for author person-group presence (WARNING).""" + + def _get_validator(self, xml_content, rules=None): + xmltree = etree.fromstring(xml_content) + rules = rules or {"author_presence_error_level": "WARNING"} + validator = ArticleProductValidation(xmltree, rules) + products = list(validator.products_model.products) + if products: + return ProductValidation(products[0], rules) + return None + + def test_author_present_ok(self): + """ with author person-group should be OK.""" + xml = """ +
+ + + + + + Smith + John + + + Book Title + + + +
+ """ + validator = self._get_validator(xml) + result = validator.validate_author_presence() + self.assertEqual(result["response"], "OK") + + def test_author_missing_warning(self): + """ without author person-group should be WARNING.""" + xml = """ +
+ + + + Book Title + Publisher + 2020 + + + +
+ """ + validator = self._get_validator(xml) + result = validator.validate_author_presence() + self.assertEqual(result["response"], "WARNING") + + def test_multiple_authors_ok(self): + """ with multiple authors should be OK.""" + xml = """ +
+ + + + + + Silva + João + + + Santos + Maria + + + Book Title + + + +
+ """ + validator = self._get_validator(xml) + result = validator.validate_author_presence() + self.assertEqual(result["response"], "OK") + + def test_editor_only_warning(self): + """ with only editor person-group (no author) should be WARNING.""" + xml = """ +
+ + + + + + Oliveira + Carlos + + + Book Title + + + +
+ """ + validator = self._get_validator(xml) + result = validator.validate_author_presence() + self.assertEqual(result["response"], "WARNING") + + def test_translator_only_warning(self): + """ with only translator person-group (no author) should be WARNING.""" + xml = """ +
+ + + + + + Castro + Antonia + + + Book Title + + + +
+ """ + validator = self._get_validator(xml) + result = validator.validate_author_presence() + self.assertEqual(result["response"], "WARNING") + + +class TestPublisherNamePresence(unittest.TestCase): + """Tests for presence (WARNING).""" + + def _get_validator(self, xml_content, rules=None): + xmltree = etree.fromstring(xml_content) + rules = rules or {"publisher_name_presence_error_level": "WARNING"} + validator = ArticleProductValidation(xmltree, rules) + products = list(validator.products_model.products) + if products: + return ProductValidation(products[0], rules) + return None + + def test_publisher_name_present_ok(self): + """ with should be OK.""" + xml = """ +
+ + + + Book Title + Oxford University Press + + + +
+ """ + validator = self._get_validator(xml) + result = validator.validate_publisher_name_presence() + self.assertEqual(result["response"], "OK") + + def test_publisher_name_missing_warning(self): + """ without should be WARNING.""" + xml = """ +
+ + + + + + Smith + John + + + Book Title + 2020 + + + +
+ """ + validator = self._get_validator(xml) + result = validator.validate_publisher_name_presence() + self.assertEqual(result["response"], "WARNING") + + +class TestYearPresence(unittest.TestCase): + """Tests for presence (WARNING).""" + + def _get_validator(self, xml_content, rules=None): + xmltree = etree.fromstring(xml_content) + rules = rules or {"year_presence_error_level": "WARNING"} + validator = ArticleProductValidation(xmltree, rules) + products = list(validator.products_model.products) + if products: + return ProductValidation(products[0], rules) + return None + + def test_year_present_ok(self): + """ with should be OK.""" + xml = """ +
+ + + + Book Title + 2020 + + + +
+ """ + validator = self._get_validator(xml) + result = validator.validate_year_presence() + self.assertEqual(result["response"], "OK") + + def test_year_missing_warning(self): + """ without should be WARNING.""" + xml = """ +
+ + + + + + Smith + John + + + Book Title + Publisher + + + +
+ """ + validator = self._get_validator(xml) + result = validator.validate_year_presence() + self.assertEqual(result["response"], "WARNING") + + +class TestProductValidateAll(unittest.TestCase): + """Tests for the validate() method that runs all validations.""" + + def _get_results(self, xml_content, rules=None): + xmltree = etree.fromstring(xml_content) + rules = rules or { + "product_type_presence_error_level": "CRITICAL", + "product_type_value_error_level": "ERROR", + "source_presence_error_level": "CRITICAL", + "article_type_consistency_error_level": "ERROR", + "author_presence_error_level": "WARNING", + "publisher_name_presence_error_level": "WARNING", + "year_presence_error_level": "WARNING", + "product_type_list": ["book"], + } + validator = ArticleProductValidation(xmltree, rules) + return filter_results(validator.validate()) + + def test_complete_product_all_ok(self): + """Complete product with all elements should have all OK.""" + xml = """ +
+ + + + + + ONFRAY + Michel + + + La comunidad filosófica + Gedisa + Barcelona + 2008 + 155 + 978-84-9784-252-5 + + + +
+ """ + results = self._get_results(xml) + for result in results: + self.assertEqual(result["response"], "OK", f"Failed for: {result['title']}") + + def test_minimal_product_with_warnings(self): + """Minimal product (only source) should have warnings for missing recommended.""" + xml = """ +
+ + + + Historia de la Filosofía Moderna + + + +
+ """ + results = self._get_results(xml) + responses = {r["title"]: r["response"] for r in results} + + self.assertEqual(responses["@product-type attribute"], "OK") + self.assertEqual(responses["@product-type value"], "OK") + self.assertEqual(responses["source element"], "OK") + self.assertEqual(responses["article-type consistency"], "OK") + self.assertEqual(responses["author in product"], "WARNING") + self.assertEqual(responses["publisher-name in product"], "WARNING") + self.assertEqual(responses["year in product"], "WARNING") + + def test_no_product_no_results(self): + """Article without should yield no results.""" + xml = """ +
+ + + + Research Paper + + + +
+ """ + results = self._get_results(xml) + self.assertEqual(len(results), 0) + + def test_product_without_product_type_has_critical(self): + """Product without @product-type should have CRITICAL.""" + xml = """ +
+ + + + Book Title + + + +
+ """ + results = self._get_results(xml) + type_results = [r for r in results if r["title"] == "@product-type attribute"] + self.assertEqual(len(type_results), 1) + self.assertEqual(type_results[0]["response"], "CRITICAL") + + def test_wrong_article_type_has_error(self): + """Product with wrong article-type should have ERROR.""" + xml = """ +
+ + + + Book Title + + + +
+ """ + results = self._get_results(xml) + consistency_results = [r for r in results if r["title"] == "article-type consistency"] + self.assertEqual(len(consistency_results), 1) + self.assertEqual(consistency_results[0]["response"], "ERROR") + + +class TestMultipleProducts(unittest.TestCase): + """Tests for articles with multiple elements.""" + + def _get_results(self, xml_content, rules=None): + xmltree = etree.fromstring(xml_content) + rules = rules or { + "product_type_presence_error_level": "CRITICAL", + "product_type_value_error_level": "ERROR", + "source_presence_error_level": "CRITICAL", + "article_type_consistency_error_level": "ERROR", + "author_presence_error_level": "WARNING", + "publisher_name_presence_error_level": "WARNING", + "year_presence_error_level": "WARNING", + "product_type_list": ["book"], + } + validator = ArticleProductValidation(xmltree, rules) + return filter_results(validator.validate()) + + def test_two_products_ok(self): + """Article with two valid products should yield results for both.""" + xml = """ +
+ + + + + + Smith + John + + + Introduction to Philosophy + Oxford University Press + 2019 + + + + + Jones + Mary + + + Advanced Philosophy + Cambridge University Press + 2020 + + + +
+ """ + results = self._get_results(xml) + # Each product yields 7 validations, all OK + self.assertEqual(len(results), 14) + for result in results: + self.assertEqual(result["response"], "OK", f"Failed for: {result['title']}") + + def test_three_products_ok(self): + """Article with three products should yield results for all three.""" + xml = """ +
+ + + + Book One + + + Book Two + + + Book Three + + + +
+ """ + results = self._get_results(xml) + # 7 validations per product × 3 products = 21 + self.assertEqual(len(results), 21) + + +class TestPersonGroupTypes(unittest.TestCase): + """Tests for person-group types within .""" + + def _get_validator(self, xml_content, rules=None): + xmltree = etree.fromstring(xml_content) + rules = rules or {"author_presence_error_level": "WARNING"} + validator = ArticleProductValidation(xmltree, rules) + products = list(validator.products_model.products) + if products: + return ProductValidation(products[0], rules) + return None + + def test_author_person_group_ok(self): + """ should satisfy author check.""" + xml = """ +
+ + + + + + Smith + John + + + Book Title + + + +
+ """ + validator = self._get_validator(xml) + result = validator.validate_author_presence() + self.assertEqual(result["response"], "OK") + + def test_editor_person_group_not_author(self): + """ should not satisfy author check.""" + xml = """ +
+ + + + + + Oliveira + Carlos + + + Book Title + + + +
+ """ + validator = self._get_validator(xml) + result = validator.validate_author_presence() + self.assertEqual(result["response"], "WARNING") + + def test_author_and_translator_ok(self): + """Author and translator person-groups together should be OK.""" + xml = """ +
+ + + + + + ONFRAY + Michel + + + + + Castro + Antonia + + + Book Title + + + +
+ """ + validator = self._get_validator(xml) + result = validator.validate_author_presence() + self.assertEqual(result["response"], "OK") + + +class TestOptionalElements(unittest.TestCase): + """Tests for optional elements in .""" + + def _get_model_products(self, xml_content): + xmltree = etree.fromstring(xml_content) + model = ArticleProductValidation(xmltree, {}) + return list(model.products_model.products) + + def test_product_with_isbn(self): + """Product with ISBN should be extracted.""" + xml = """ +
+ + + + Book Title + 978-84-9784-252-5 + + + +
+ """ + products = self._get_model_products(xml) + self.assertEqual(len(products), 1) + self.assertEqual(products[0]["isbn"], "978-84-9784-252-5") + + def test_product_with_publisher_loc(self): + """Product with publisher-loc should be extracted.""" + xml = """ +
+ + + + Book Title + Barcelona + + + +
+ """ + products = self._get_model_products(xml) + self.assertEqual(len(products), 1) + self.assertEqual(products[0]["publisher_loc"], "Barcelona") + + def test_product_with_size(self): + """Product with size should be extracted.""" + xml = """ +
+ + + + Book Title + 155 + + + +
+ """ + products = self._get_model_products(xml) + self.assertEqual(len(products), 1) + self.assertEqual(products[0]["size"], "155") + + def test_product_without_optional_elements(self): + """Product without optional elements should have None values.""" + xml = """ +
+ + + + Book Title + + + +
+ """ + products = self._get_model_products(xml) + self.assertEqual(len(products), 1) + self.assertIsNone(products[0]["isbn"]) + self.assertIsNone(products[0]["publisher_loc"]) + self.assertIsNone(products[0]["size"]) + + +class TestArticleProductModel(unittest.TestCase): + """Tests for the ArticleProducts model.""" + + def test_no_products(self): + """Article without should yield no products.""" + xml = """ +
+ + + + Title + + + +
+ """ + from packtools.sps.models.product import ArticleProducts + xmltree = etree.fromstring(xml) + model = ArticleProducts(xmltree) + products = list(model.products) + self.assertEqual(len(products), 0) + + def test_one_product(self): + """Article with one should yield one product.""" + xml = """ +
+ + + + Book Title + + + +
+ """ + from packtools.sps.models.product import ArticleProducts + xmltree = etree.fromstring(xml) + model = ArticleProducts(xmltree) + products = list(model.products) + self.assertEqual(len(products), 1) + self.assertEqual(products[0]["product_type"], "book") + self.assertEqual(products[0]["source"], "Book Title") + self.assertEqual(products[0]["parent"], "article") + self.assertEqual(products[0]["parent_article_type"], "book-review") + self.assertEqual(products[0]["parent_lang"], "en") + + def test_product_person_groups(self): + """Product with multiple person-groups should report all types.""" + xml = """ +
+ + + + + + Author + First + + + + + Translator + First + + + Title + + + +
+ """ + from packtools.sps.models.product import ArticleProducts + xmltree = etree.fromstring(xml) + model = ArticleProducts(xmltree) + products = list(model.products) + self.assertEqual(len(products), 1) + self.assertIn("author", products[0]["person_groups"]) + self.assertIn("translator", products[0]["person_groups"]) + self.assertTrue(products[0]["has_author"]) + + +class TestResponseStructure(unittest.TestCase): + """Tests for the validation response structure (i18n fields).""" + + def test_response_has_i18n_fields(self): + """All responses should have msg_text, msg_params, adv_text, adv_params.""" + xml = """ +
+ + + + Book Title + + + +
+ """ + xmltree = etree.fromstring(xml) + rules = { + "product_type_presence_error_level": "CRITICAL", + "product_type_value_error_level": "ERROR", + "source_presence_error_level": "CRITICAL", + "article_type_consistency_error_level": "ERROR", + "author_presence_error_level": "WARNING", + "publisher_name_presence_error_level": "WARNING", + "year_presence_error_level": "WARNING", + "product_type_list": ["book"], + } + validator = ArticleProductValidation(xmltree, rules) + results = filter_results(validator.validate()) + + expected_keys = { + "title", "parent", "parent_id", "parent_article_type", + "parent_lang", "item", "sub_item", "validation_type", + "response", "expected_value", "got_value", "message", + "msg_text", "msg_params", "advice", "adv_text", + "adv_params", "data", + } + + for result in results: + self.assertTrue( + expected_keys.issubset(result.keys()), + f"Missing keys in response for '{result.get('title', 'unknown')}': " + f"{expected_keys - result.keys()}" + ) + self.assertIn("msg_text", result) + self.assertIn("msg_params", result) + + def test_error_response_has_advice(self): + """Error responses should have non-None advice and adv_text.""" + xml = """ +
+ + + + Book Title + + + +
+ """ + xmltree = etree.fromstring(xml) + rules = { + "product_type_presence_error_level": "CRITICAL", + "product_type_value_error_level": "ERROR", + "source_presence_error_level": "CRITICAL", + "article_type_consistency_error_level": "ERROR", + "author_presence_error_level": "WARNING", + "publisher_name_presence_error_level": "WARNING", + "year_presence_error_level": "WARNING", + "product_type_list": ["book"], + } + validator = ArticleProductValidation(xmltree, rules) + results = filter_results(validator.validate()) + + error_results = [r for r in results if r["response"] != "OK"] + self.assertTrue(len(error_results) > 0) + for result in error_results: + self.assertIsNotNone(result["advice"], f"No advice for {result['title']}") + self.assertIsNotNone(result["adv_text"], f"No adv_text for {result['title']}") + + +class TestEdgeCases(unittest.TestCase): + """Tests for edge cases.""" + + def _get_results(self, xml_content, rules=None): + xmltree = etree.fromstring(xml_content) + rules = rules or { + "product_type_presence_error_level": "CRITICAL", + "product_type_value_error_level": "ERROR", + "source_presence_error_level": "CRITICAL", + "article_type_consistency_error_level": "ERROR", + "author_presence_error_level": "WARNING", + "publisher_name_presence_error_level": "WARNING", + "year_presence_error_level": "WARNING", + "product_type_list": ["book"], + } + validator = ArticleProductValidation(xmltree, rules) + return filter_results(validator.validate()) + + def test_source_with_subtitle_ok(self): + """Source with colon/subtitle should be OK.""" + xml = """ +
+ + + + Main Title: A Subtitle + + + +
+ """ + results = self._get_results(xml) + source_results = [r for r in results if r["title"] == "source element"] + self.assertEqual(source_results[0]["response"], "OK") + + def test_year_with_four_digits_ok(self): + """Year with 4 digits should be OK.""" + xml = """ +
+ + + + Book Title + 2020 + + + +
+ """ + results = self._get_results(xml) + year_results = [r for r in results if r["title"] == "year in product"] + self.assertEqual(year_results[0]["response"], "OK") + + def test_isbn_format_not_validated(self): + """ISBN format should not be validated (out of scope).""" + xml = """ +
+ + + + Book Title + invalid-isbn + + + +
+ """ + # Should not fail - ISBN format validation is out of scope + results = self._get_results(xml) + # No ISBN validation result should exist + isbn_results = [r for r in results if "isbn" in r.get("title", "").lower()] + self.assertEqual(len(isbn_results), 0) + + def test_product_with_editor_as_organizer(self): + """Product with editor person-group (organizer) should be valid.""" + xml = """ +
+ + + + + + Oliveira + Carlos + + + Coletânea de Artigos sobre Educação + Rio de Janeiro + Fundação Getúlio Vargas + 2021 + + + +
+ """ + results = self._get_results(xml) + responses = {r["title"]: r["response"] for r in results} + # Editor is not "author", so author validation should warn + self.assertEqual(responses["author in product"], "WARNING") + # All others should be OK + self.assertEqual(responses["@product-type attribute"], "OK") + self.assertEqual(responses["@product-type value"], "OK") + self.assertEqual(responses["source element"], "OK") + self.assertEqual(responses["publisher-name in product"], "OK") + self.assertEqual(responses["year in product"], "OK") + + +if __name__ == "__main__": + unittest.main()