From 0d498a3a0bfe3ded6b0d1c3da0f46536976ff5b4 Mon Sep 17 00:00:00 2001 From: Jordan Padams Date: Mon, 30 Mar 2026 13:40:36 -0700 Subject: [PATCH 1/6] Fix #1548: non-existent target incorrectly reports PASS Intercept non-existent file targets in doValidation() before running the validator. Records a MISSING_REFERENCED_FILE error (PRODUCT category) directly to the report so the product shows FAIL, the error is counted in the summary, and the exit code is non-zero. Previously, LocationValidator recorded the error as NO_PRODUCTS_FOUND (ProblemCategory.EXECUTION), which Report.record() explicitly excluded from error counts, causing the product to show PASS with 0 errors in the summary. Co-Authored-By: Claude Sonnet 4.6 --- .../nasa/pds/validate/ValidateLauncher.java | 19 +++++++++++++++++++ src/test/resources/features/4.1.x.feature | 1 + src/test/resources/github1548/.gitkeep | 0 3 files changed, 20 insertions(+) create mode 100644 src/test/resources/github1548/.gitkeep diff --git a/src/main/java/gov/nasa/pds/validate/ValidateLauncher.java b/src/main/java/gov/nasa/pds/validate/ValidateLauncher.java index 3cd2f716e..00b802f72 100644 --- a/src/main/java/gov/nasa/pds/validate/ValidateLauncher.java +++ b/src/main/java/gov/nasa/pds/validate/ValidateLauncher.java @@ -1504,6 +1504,25 @@ public boolean doValidation(Map checksumManifest) throws Exception } LOG.debug("ValidateLauncher:doValidation: validator.validate():target {}", target); + + // Check if explicitly-specified target file exists before running validation. + // If not, record an error directly so the product shows FAIL and the summary + // reflects the error (issue #1548). + File targetFile = FileUtils.toFile(target); + if (targetFile != null && !targetFile.exists()) { + ValidationProblem p = new ValidationProblem( + new ProblemDefinition(ExceptionType.ERROR, ProblemType.MISSING_REFERENCED_FILE, + "Target path does not exist: " + target.getPath()), + target); + try { + report.record(target.toURI(), p); + } catch (URISyntaxException e) { + // Should not happen + } + success = false; + continue; + } + validator.validate(monitor, target); monitor.endValidation(); diff --git a/src/test/resources/features/4.1.x.feature b/src/test/resources/features/4.1.x.feature index 459dc4d27..7b8aaff56 100644 --- a/src/test/resources/features/4.1.x.feature +++ b/src/test/resources/features/4.1.x.feature @@ -9,3 +9,4 @@ Feature: 4.1.x | 956 | 1 | "github956" | "--skip-context-validation -t {datasrc}/" | | | 1458 | 1 | "github1458" | " -t {datasrc}/" | "summary:productValidation:passed=2,summary:productValidation:total=2,summary:totalWarnings=6,messageTypes:warning.label.context_ref_mismatch=5,messageTypes:warning.label.schematron=1" | | 1481 | 1 | "github1481" | "--skip-context-validation --skip-product-validation -R pds4.bundle -t {datasrc}/bundle_test.xml" | "summary:totalWarnings=1,summary:messageTypes:warning.integrity.reference_not_found=1" | +| 1548 | 1 | "github1548" | "--skip-context-validation -t {datasrc}/nonexistent.xml" | "summary:totalErrors=1,summary:productValidation:failed=1,summary:messageTypes:error.label.missing_file=1" | diff --git a/src/test/resources/github1548/.gitkeep b/src/test/resources/github1548/.gitkeep new file mode 100644 index 000000000..e69de29bb From 028a6254bfef84d2a8a272cebb67bab8a50522a5 Mon Sep 17 00:00:00 2001 From: PDSEN CI Bot Date: Wed, 1 Apr 2026 22:21:54 +0000 Subject: [PATCH 2/6] Update changelog --- CHANGELOG.md | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 67df8df9d..76588e0d0 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,6 +1,6 @@ # Changelog -## [«unknown»](https://github.com/NASA-PDS/validate/tree/«unknown») (2026-03-30) +## [«unknown»](https://github.com/NASA-PDS/validate/tree/«unknown») (2026-04-01) [Full Changelog](https://github.com/NASA-PDS/validate/compare/v4.0.8...«unknown») @@ -12,6 +12,8 @@ **Defects:** +- Validate does not count cleanly-passing products, incorrectly triggering no\_products\_found error [\#1557](https://github.com/NASA-PDS/validate/issues/1557) [[s.medium](https://github.com/NASA-PDS/validate/labels/s.medium)] +- validate does not fail or report errors when an explicitly-specified target file does not exist [\#1548](https://github.com/NASA-PDS/validate/issues/1548) [[s.medium](https://github.com/NASA-PDS/validate/labels/s.medium)] - error.directory.unallowed\_name not raised on Windows for absolute directory\_path\_name [\#1539](https://github.com/NASA-PDS/validate/issues/1539) [[s.medium](https://github.com/NASA-PDS/validate/labels/s.medium)] - When validate reports a directory has a warning it says "XML could not be parsed." which is misleading. [\#1510](https://github.com/NASA-PDS/validate/issues/1510) [[s.medium](https://github.com/NASA-PDS/validate/labels/s.medium)] - Referential integrity not being checked in all places there is an Internal\_Reference area [\#1481](https://github.com/NASA-PDS/validate/issues/1481) [[s.medium](https://github.com/NASA-PDS/validate/labels/s.medium)] From c0cae5d3c3cf736705e8dc072c54818e9d604812 Mon Sep 17 00:00:00 2001 From: Devin AI <158243242+devin-ai-integration[bot]@users.noreply.github.com> Date: Wed, 1 Apr 2026 22:40:49 +0000 Subject: [PATCH 3/6] Cache compiled schematron Templates to avoid repeated XSLT compilation Add a HashMap cache to SchematronTransformer so that the expensive ISO schematron XSLT compilation is performed only once per unique schematron source string. Subsequent calls to transform(String) return a new Transformer from the cached Templates object. - Extract compilation logic into private compileSchematron() returning Templates - Add cache lookup in transform(String, ProblemHandler) - Add clearCache() method (naturally reset when LabelValidator.clear() creates a new instance) - Add debug logging for cache hits/misses Fixes: https://github.com/NASA-PDS/validate/issues/1565 Co-Authored-By: jordan.h.padams --- .../tools/label/SchematronTransformer.java | 25 ++++++++++++++++--- 1 file changed, 22 insertions(+), 3 deletions(-) diff --git a/src/main/java/gov/nasa/pds/tools/label/SchematronTransformer.java b/src/main/java/gov/nasa/pds/tools/label/SchematronTransformer.java index 212a1421a..d71d0136b 100644 --- a/src/main/java/gov/nasa/pds/tools/label/SchematronTransformer.java +++ b/src/main/java/gov/nasa/pds/tools/label/SchematronTransformer.java @@ -20,9 +20,12 @@ import java.io.StringWriter; import java.net.URL; import java.nio.charset.StandardCharsets; +import java.util.HashMap; +import java.util.Map; import javax.xml.parsers.DocumentBuilderFactory; import javax.xml.XMLConstants; import javax.xml.transform.Source; +import javax.xml.transform.Templates; import javax.xml.transform.Transformer; import javax.xml.transform.TransformerConfigurationException; import javax.xml.transform.TransformerException; @@ -44,6 +47,7 @@ */ public class SchematronTransformer { private static final Logger LOG = LoggerFactory.getLogger(SchematronTransformer.class); + private final Map cachedTemplates = new HashMap<>(); /** * Constructor. @@ -98,22 +102,37 @@ public Transformer transform(Source source) throws TransformerException { * @throws TransformerException If an error occurred during the transform process. */ public Transformer transform(Source source, ProblemHandler handler) throws TransformerException { - Transformer isoTransformer = this.buildIsoTransformer(); + return compileSchematron(source, handler).newTransformer(); + } + private Templates compileSchematron(Source source, ProblemHandler handler) throws TransformerException { + Transformer isoTransformer = this.buildIsoTransformer(); if (handler != null) { isoTransformer.setErrorListener(new TransformerErrorListener(handler)); } StringWriter schematronStyleSheet = new StringWriter(); isoTransformer.transform(source, new StreamResult(schematronStyleSheet)); return TransformerFactory.newInstance() - .newTransformer(new StreamSource(new StringReader(schematronStyleSheet.toString()))); + .newTemplates(new StreamSource(new StringReader(schematronStyleSheet.toString()))); } public Transformer transform(String source) throws TransformerException { return this.transform(source, null); } public Transformer transform(String source, ProblemHandler handler) throws TransformerException { - return transform (new StreamSource(new StringReader(source)), handler); + Templates templates = cachedTemplates.get(source); + if (templates == null) { + LOG.debug("transform: cache miss, compiling schematron (length={})", source.length()); + templates = compileSchematron(new StreamSource(new StringReader(source)), handler); + cachedTemplates.put(source, templates); + } else { + LOG.debug("transform: cache hit, reusing compiled schematron (length={})", source.length()); + } + return templates.newTransformer(); + } + + public void clearCache() { + cachedTemplates.clear(); } /** * Transform the given schematron. From 247d0b2eacd19d46a6c6af4aca84b6c3cda7526c Mon Sep 17 00:00:00 2001 From: Devin AI <158243242+devin-ai-integration[bot]@users.noreply.github.com> Date: Wed, 1 Apr 2026 23:19:38 +0000 Subject: [PATCH 4/6] Address PR review comments on schematron cache 1. Use ConcurrentHashMap instead of HashMap for thread-safety (#1566, #1567) 2. Key cache on SHA-256 hash of source string to reduce memory overhead 3. Make clearCache() package-private; add cacheSize() for testing 4. Document Source-path cache bypass design decision 5. Add SchematronTransformerTest with cache behavior assertions Co-Authored-By: jordan.h.padams --- .../tools/label/SchematronTransformer.java | 39 ++++++- .../label/SchematronTransformerTest.java | 100 ++++++++++++++++++ 2 files changed, 134 insertions(+), 5 deletions(-) create mode 100644 src/test/java/gov/nasa/pds/tools/label/SchematronTransformerTest.java diff --git a/src/main/java/gov/nasa/pds/tools/label/SchematronTransformer.java b/src/main/java/gov/nasa/pds/tools/label/SchematronTransformer.java index d71d0136b..468d61357 100644 --- a/src/main/java/gov/nasa/pds/tools/label/SchematronTransformer.java +++ b/src/main/java/gov/nasa/pds/tools/label/SchematronTransformer.java @@ -20,8 +20,10 @@ import java.io.StringWriter; import java.net.URL; import java.nio.charset.StandardCharsets; -import java.util.HashMap; +import java.security.MessageDigest; +import java.security.NoSuchAlgorithmException; import java.util.Map; +import java.util.concurrent.ConcurrentHashMap; import javax.xml.parsers.DocumentBuilderFactory; import javax.xml.XMLConstants; import javax.xml.transform.Source; @@ -47,7 +49,7 @@ */ public class SchematronTransformer { private static final Logger LOG = LoggerFactory.getLogger(SchematronTransformer.class); - private final Map cachedTemplates = new HashMap<>(); + private final Map cachedTemplates = new ConcurrentHashMap<>(); /** * Constructor. @@ -101,6 +103,9 @@ public Transformer transform(Source source) throws TransformerException { * * @throws TransformerException If an error occurred during the transform process. */ + // Not cached — Source objects are not stably comparable/hashable. + // In practice LabelValidator only calls the String-based overload (line 622), + // so this uncached path does not affect bundle-validation performance. public Transformer transform(Source source, ProblemHandler handler) throws TransformerException { return compileSchematron(source, handler).newTransformer(); } @@ -120,20 +125,44 @@ public Transformer transform(String source) throws TransformerException { return this.transform(source, null); } public Transformer transform(String source, ProblemHandler handler) throws TransformerException { - Templates templates = cachedTemplates.get(source); + String key = sha256(source); + Templates templates = cachedTemplates.get(key); if (templates == null) { LOG.debug("transform: cache miss, compiling schematron (length={})", source.length()); templates = compileSchematron(new StreamSource(new StringReader(source)), handler); - cachedTemplates.put(source, templates); + cachedTemplates.put(key, templates); } else { LOG.debug("transform: cache hit, reusing compiled schematron (length={})", source.length()); } return templates.newTransformer(); } - public void clearCache() { + // Package-private: cache lifetime is tied to the SchematronTransformer instance. + // LabelValidator.clear() creates a new instance (line 219), which naturally + // starts with an empty cache. Exposed for testing. + void clearCache() { cachedTemplates.clear(); } + + /** Returns the number of cached templates entries. Package-private for testing. */ + int cacheSize() { + return cachedTemplates.size(); + } + + private static String sha256(String input) { + try { + MessageDigest digest = MessageDigest.getInstance("SHA-256"); + byte[] hash = digest.digest(input.getBytes(StandardCharsets.UTF_8)); + StringBuilder hex = new StringBuilder(hash.length * 2); + for (byte b : hash) { + hex.append(String.format("%02x", b)); + } + return hex.toString(); + } catch (NoSuchAlgorithmException e) { + // SHA-256 is guaranteed to be available on all JVMs + throw new RuntimeException(e); + } + } /** * Transform the given schematron. * diff --git a/src/test/java/gov/nasa/pds/tools/label/SchematronTransformerTest.java b/src/test/java/gov/nasa/pds/tools/label/SchematronTransformerTest.java new file mode 100644 index 000000000..1d7ce11e7 --- /dev/null +++ b/src/test/java/gov/nasa/pds/tools/label/SchematronTransformerTest.java @@ -0,0 +1,100 @@ +package gov.nasa.pds.tools.label; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertNotNull; +import static org.junit.jupiter.api.Assertions.assertNotSame; + +import javax.xml.transform.Transformer; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; + +/** + * Unit tests for {@link SchematronTransformer} cache behavior. + */ +public class SchematronTransformerTest { + + /** Minimal valid ISO Schematron document. */ + private static final String MINIMAL_SCHEMATRON = + "\n" + + "\n" + + " Minimal test schematron\n" + + " \n" + + " \n" + + " Always passes.\n" + + " \n" + + " \n" + + ""; + + /** A different schematron to verify distinct cache entries. */ + private static final String OTHER_SCHEMATRON = + "\n" + + "\n" + + " Other test schematron\n" + + " \n" + + " \n" + + " Also passes.\n" + + " \n" + + " \n" + + ""; + + private SchematronTransformer transformer; + + @BeforeEach + void setUp() throws Exception { + transformer = new SchematronTransformer(); + } + + @Test + void testCachePopulatedOnFirstCall() throws Exception { + assertEquals(0, transformer.cacheSize(), "cache should start empty"); + + Transformer t = transformer.transform(MINIMAL_SCHEMATRON); + assertNotNull(t); + assertEquals(1, transformer.cacheSize(), "cache should have one entry after first call"); + } + + @Test + void testCacheHitOnRepeatedCall() throws Exception { + Transformer t1 = transformer.transform(MINIMAL_SCHEMATRON); + assertEquals(1, transformer.cacheSize()); + + Transformer t2 = transformer.transform(MINIMAL_SCHEMATRON); + assertEquals(1, transformer.cacheSize(), "cache size should remain 1 after repeated call"); + + // Each call should return a distinct Transformer (Transformer is not thread-safe) + assertNotSame(t1, t2, "each call should produce a new Transformer instance"); + } + + @Test + void testDistinctSchematronsGetSeparateCacheEntries() throws Exception { + transformer.transform(MINIMAL_SCHEMATRON); + assertEquals(1, transformer.cacheSize()); + + transformer.transform(OTHER_SCHEMATRON); + assertEquals(2, transformer.cacheSize(), "distinct schematrons should produce separate cache entries"); + } + + @Test + void testClearCacheRemovesAllEntries() throws Exception { + transformer.transform(MINIMAL_SCHEMATRON); + transformer.transform(OTHER_SCHEMATRON); + assertEquals(2, transformer.cacheSize()); + + transformer.clearCache(); + assertEquals(0, transformer.cacheSize(), "cache should be empty after clearCache()"); + } + + @Test + void testCacheMissAfterClear() throws Exception { + transformer.transform(MINIMAL_SCHEMATRON); + assertEquals(1, transformer.cacheSize()); + + transformer.clearCache(); + assertEquals(0, transformer.cacheSize()); + + // Re-compile should repopulate the cache + Transformer t = transformer.transform(MINIMAL_SCHEMATRON); + assertNotNull(t); + assertEquals(1, transformer.cacheSize(), "cache should repopulate after clear"); + } +} From 93158afceb1b28b5c647d71799dc3f746650562e Mon Sep 17 00:00:00 2001 From: Devin AI <158243242+devin-ai-integration[bot]@users.noreply.github.com> Date: Thu, 2 Apr 2026 00:11:22 +0000 Subject: [PATCH 5/6] Address second round of review comments 1. ThreadLocal instead of per-call getInstance (avoids provider lookup) 2. HexFormat.of().formatHex() instead of String.format loop (Java 17+) 3. Document intentional check-then-act race on ConcurrentHashMap 4. @Tag("integration") and class-level Javadoc noting Saxon-HE dependency on tests Co-Authored-By: jordan.h.padams --- .../tools/label/SchematronTransformer.java | 21 +++++++++++-------- .../label/SchematronTransformerTest.java | 8 ++++++- 2 files changed, 19 insertions(+), 10 deletions(-) diff --git a/src/main/java/gov/nasa/pds/tools/label/SchematronTransformer.java b/src/main/java/gov/nasa/pds/tools/label/SchematronTransformer.java index 468d61357..3289c0093 100644 --- a/src/main/java/gov/nasa/pds/tools/label/SchematronTransformer.java +++ b/src/main/java/gov/nasa/pds/tools/label/SchematronTransformer.java @@ -22,6 +22,7 @@ import java.nio.charset.StandardCharsets; import java.security.MessageDigest; import java.security.NoSuchAlgorithmException; +import java.util.HexFormat; import java.util.Map; import java.util.concurrent.ConcurrentHashMap; import javax.xml.parsers.DocumentBuilderFactory; @@ -126,6 +127,9 @@ public Transformer transform(String source) throws TransformerException { } public Transformer transform(String source, ProblemHandler handler) throws TransformerException { String key = sha256(source); + // Intentional check-then-act: a concurrent miss may compile twice, but + // both results are equivalent and the race is self-healing after warmup. + // computeIfAbsent is avoided because it holds a lock during compilation. Templates templates = cachedTemplates.get(key); if (templates == null) { LOG.debug("transform: cache miss, compiling schematron (length={})", source.length()); @@ -149,19 +153,18 @@ int cacheSize() { return cachedTemplates.size(); } - private static String sha256(String input) { + private static final ThreadLocal SHA256 = ThreadLocal.withInitial(() -> { try { - MessageDigest digest = MessageDigest.getInstance("SHA-256"); - byte[] hash = digest.digest(input.getBytes(StandardCharsets.UTF_8)); - StringBuilder hex = new StringBuilder(hash.length * 2); - for (byte b : hash) { - hex.append(String.format("%02x", b)); - } - return hex.toString(); + return MessageDigest.getInstance("SHA-256"); } catch (NoSuchAlgorithmException e) { - // SHA-256 is guaranteed to be available on all JVMs throw new RuntimeException(e); } + }); + + private static String sha256(String input) { + MessageDigest digest = SHA256.get(); + digest.reset(); + return HexFormat.of().formatHex(digest.digest(input.getBytes(StandardCharsets.UTF_8))); } /** * Transform the given schematron. diff --git a/src/test/java/gov/nasa/pds/tools/label/SchematronTransformerTest.java b/src/test/java/gov/nasa/pds/tools/label/SchematronTransformerTest.java index 1d7ce11e7..cfe5a9f17 100644 --- a/src/test/java/gov/nasa/pds/tools/label/SchematronTransformerTest.java +++ b/src/test/java/gov/nasa/pds/tools/label/SchematronTransformerTest.java @@ -6,11 +6,17 @@ import javax.xml.transform.Transformer; import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Tag; import org.junit.jupiter.api.Test; /** - * Unit tests for {@link SchematronTransformer} cache behavior. + * Tests for {@link SchematronTransformer} cache behavior. + * + *

These tests exercise the full ISO schematron → XSLT compilation pipeline and + * therefore require Saxon-HE on the classpath. They are slower than pure unit tests + * on first run due to XSLT compilation overhead. */ +@Tag("integration") public class SchematronTransformerTest { /** Minimal valid ISO Schematron document. */ From e84ab06dd74f573ef3cb8fa0db3b591aaeaef54d Mon Sep 17 00:00:00 2001 From: Devin AI <158243242+devin-ai-integration[bot]@users.noreply.github.com> Date: Thu, 2 Apr 2026 00:14:20 +0000 Subject: [PATCH 6/6] Add inline comment: handler intentionally unused on cache hit Co-Authored-By: jordan.h.padams --- .../java/gov/nasa/pds/tools/label/SchematronTransformer.java | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/main/java/gov/nasa/pds/tools/label/SchematronTransformer.java b/src/main/java/gov/nasa/pds/tools/label/SchematronTransformer.java index 3289c0093..a0a8ebc8c 100644 --- a/src/main/java/gov/nasa/pds/tools/label/SchematronTransformer.java +++ b/src/main/java/gov/nasa/pds/tools/label/SchematronTransformer.java @@ -130,6 +130,8 @@ public Transformer transform(String source, ProblemHandler handler) throws Trans // Intentional check-then-act: a concurrent miss may compile twice, but // both results are equivalent and the race is self-healing after warmup. // computeIfAbsent is avoided because it holds a lock during compilation. + // handler is intentionally unused on cache hit — compilation errors + // would only occur during the first call (cache miss), not on reuse. Templates templates = cachedTemplates.get(key); if (templates == null) { LOG.debug("transform: cache miss, compiling schematron (length={})", source.length());