diff --git a/src/main/java/gov/nasa/pds/tools/label/SchematronTransformer.java b/src/main/java/gov/nasa/pds/tools/label/SchematronTransformer.java index d71d0136b..a0a8ebc8c 100644 --- a/src/main/java/gov/nasa/pds/tools/label/SchematronTransformer.java +++ b/src/main/java/gov/nasa/pds/tools/label/SchematronTransformer.java @@ -20,8 +20,11 @@ import java.io.StringWriter; import java.net.URL; import java.nio.charset.StandardCharsets; -import java.util.HashMap; +import java.security.MessageDigest; +import java.security.NoSuchAlgorithmException; +import java.util.HexFormat; import java.util.Map; +import java.util.concurrent.ConcurrentHashMap; import javax.xml.parsers.DocumentBuilderFactory; import javax.xml.XMLConstants; import javax.xml.transform.Source; @@ -47,7 +50,7 @@ */ public class SchematronTransformer { private static final Logger LOG = LoggerFactory.getLogger(SchematronTransformer.class); - private final Map cachedTemplates = new HashMap<>(); + private final Map cachedTemplates = new ConcurrentHashMap<>(); /** * Constructor. @@ -101,6 +104,9 @@ public Transformer transform(Source source) throws TransformerException { * * @throws TransformerException If an error occurred during the transform process. */ + // Not cached — Source objects are not stably comparable/hashable. + // In practice LabelValidator only calls the String-based overload (line 622), + // so this uncached path does not affect bundle-validation performance. public Transformer transform(Source source, ProblemHandler handler) throws TransformerException { return compileSchematron(source, handler).newTransformer(); } @@ -120,20 +126,48 @@ public Transformer transform(String source) throws TransformerException { return this.transform(source, null); } public Transformer transform(String source, ProblemHandler handler) throws TransformerException { - Templates templates = cachedTemplates.get(source); + String key = sha256(source); + // Intentional check-then-act: a concurrent miss may compile twice, but + // both results are equivalent and the race is self-healing after warmup. + // computeIfAbsent is avoided because it holds a lock during compilation. + // handler is intentionally unused on cache hit — compilation errors + // would only occur during the first call (cache miss), not on reuse. + Templates templates = cachedTemplates.get(key); if (templates == null) { LOG.debug("transform: cache miss, compiling schematron (length={})", source.length()); templates = compileSchematron(new StreamSource(new StringReader(source)), handler); - cachedTemplates.put(source, templates); + cachedTemplates.put(key, templates); } else { LOG.debug("transform: cache hit, reusing compiled schematron (length={})", source.length()); } return templates.newTransformer(); } - public void clearCache() { + // Package-private: cache lifetime is tied to the SchematronTransformer instance. + // LabelValidator.clear() creates a new instance (line 219), which naturally + // starts with an empty cache. Exposed for testing. + void clearCache() { cachedTemplates.clear(); } + + /** Returns the number of cached templates entries. Package-private for testing. */ + int cacheSize() { + return cachedTemplates.size(); + } + + private static final ThreadLocal SHA256 = ThreadLocal.withInitial(() -> { + try { + return MessageDigest.getInstance("SHA-256"); + } catch (NoSuchAlgorithmException e) { + throw new RuntimeException(e); + } + }); + + private static String sha256(String input) { + MessageDigest digest = SHA256.get(); + digest.reset(); + return HexFormat.of().formatHex(digest.digest(input.getBytes(StandardCharsets.UTF_8))); + } /** * Transform the given schematron. * diff --git a/src/test/java/gov/nasa/pds/tools/label/SchematronTransformerTest.java b/src/test/java/gov/nasa/pds/tools/label/SchematronTransformerTest.java new file mode 100644 index 000000000..cfe5a9f17 --- /dev/null +++ b/src/test/java/gov/nasa/pds/tools/label/SchematronTransformerTest.java @@ -0,0 +1,106 @@ +package gov.nasa.pds.tools.label; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertNotNull; +import static org.junit.jupiter.api.Assertions.assertNotSame; + +import javax.xml.transform.Transformer; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Tag; +import org.junit.jupiter.api.Test; + +/** + * Tests for {@link SchematronTransformer} cache behavior. + * + *

These tests exercise the full ISO schematron → XSLT compilation pipeline and + * therefore require Saxon-HE on the classpath. They are slower than pure unit tests + * on first run due to XSLT compilation overhead. + */ +@Tag("integration") +public class SchematronTransformerTest { + + /** Minimal valid ISO Schematron document. */ + private static final String MINIMAL_SCHEMATRON = + "\n" + + "\n" + + " Minimal test schematron\n" + + " \n" + + " \n" + + " Always passes.\n" + + " \n" + + " \n" + + ""; + + /** A different schematron to verify distinct cache entries. */ + private static final String OTHER_SCHEMATRON = + "\n" + + "\n" + + " Other test schematron\n" + + " \n" + + " \n" + + " Also passes.\n" + + " \n" + + " \n" + + ""; + + private SchematronTransformer transformer; + + @BeforeEach + void setUp() throws Exception { + transformer = new SchematronTransformer(); + } + + @Test + void testCachePopulatedOnFirstCall() throws Exception { + assertEquals(0, transformer.cacheSize(), "cache should start empty"); + + Transformer t = transformer.transform(MINIMAL_SCHEMATRON); + assertNotNull(t); + assertEquals(1, transformer.cacheSize(), "cache should have one entry after first call"); + } + + @Test + void testCacheHitOnRepeatedCall() throws Exception { + Transformer t1 = transformer.transform(MINIMAL_SCHEMATRON); + assertEquals(1, transformer.cacheSize()); + + Transformer t2 = transformer.transform(MINIMAL_SCHEMATRON); + assertEquals(1, transformer.cacheSize(), "cache size should remain 1 after repeated call"); + + // Each call should return a distinct Transformer (Transformer is not thread-safe) + assertNotSame(t1, t2, "each call should produce a new Transformer instance"); + } + + @Test + void testDistinctSchematronsGetSeparateCacheEntries() throws Exception { + transformer.transform(MINIMAL_SCHEMATRON); + assertEquals(1, transformer.cacheSize()); + + transformer.transform(OTHER_SCHEMATRON); + assertEquals(2, transformer.cacheSize(), "distinct schematrons should produce separate cache entries"); + } + + @Test + void testClearCacheRemovesAllEntries() throws Exception { + transformer.transform(MINIMAL_SCHEMATRON); + transformer.transform(OTHER_SCHEMATRON); + assertEquals(2, transformer.cacheSize()); + + transformer.clearCache(); + assertEquals(0, transformer.cacheSize(), "cache should be empty after clearCache()"); + } + + @Test + void testCacheMissAfterClear() throws Exception { + transformer.transform(MINIMAL_SCHEMATRON); + assertEquals(1, transformer.cacheSize()); + + transformer.clearCache(); + assertEquals(0, transformer.cacheSize()); + + // Re-compile should repopulate the cache + Transformer t = transformer.transform(MINIMAL_SCHEMATRON); + assertNotNull(t); + assertEquals(1, transformer.cacheSize(), "cache should repopulate after clear"); + } +}