Skip to content
Merged
44 changes: 39 additions & 5 deletions src/main/java/gov/nasa/pds/tools/label/SchematronTransformer.java
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,11 @@
import java.io.StringWriter;
import java.net.URL;
import java.nio.charset.StandardCharsets;
import java.util.HashMap;
import java.security.MessageDigest;
import java.security.NoSuchAlgorithmException;
import java.util.HexFormat;
import java.util.Map;
import java.util.concurrent.ConcurrentHashMap;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.XMLConstants;
import javax.xml.transform.Source;
Expand All @@ -47,7 +50,7 @@
*/
public class SchematronTransformer {
private static final Logger LOG = LoggerFactory.getLogger(SchematronTransformer.class);
private final Map<String, Templates> cachedTemplates = new HashMap<>();
private final Map<String, Templates> cachedTemplates = new ConcurrentHashMap<>();

/**
* Constructor.
Expand Down Expand Up @@ -101,6 +104,9 @@ public Transformer transform(Source source) throws TransformerException {
*
* @throws TransformerException If an error occurred during the transform process.
*/
// Not cached — Source objects are not stably comparable/hashable.
// In practice LabelValidator only calls the String-based overload (line 622),
// so this uncached path does not affect bundle-validation performance.
public Transformer transform(Source source, ProblemHandler handler) throws TransformerException {
return compileSchematron(source, handler).newTransformer();
}
Expand All @@ -120,20 +126,48 @@ public Transformer transform(String source) throws TransformerException {
return this.transform(source, null);
}
public Transformer transform(String source, ProblemHandler handler) throws TransformerException {
Templates templates = cachedTemplates.get(source);
String key = sha256(source);
// Intentional check-then-act: a concurrent miss may compile twice, but
// both results are equivalent and the race is self-healing after warmup.
// computeIfAbsent is avoided because it holds a lock during compilation.
// handler is intentionally unused on cache hit — compilation errors
// would only occur during the first call (cache miss), not on reuse.
Templates templates = cachedTemplates.get(key);
if (templates == null) {
LOG.debug("transform: cache miss, compiling schematron (length={})", source.length());
templates = compileSchematron(new StreamSource(new StringReader(source)), handler);
cachedTemplates.put(source, templates);
cachedTemplates.put(key, templates);
} else {
LOG.debug("transform: cache hit, reusing compiled schematron (length={})", source.length());
}
return templates.newTransformer();
}

public void clearCache() {
// Package-private: cache lifetime is tied to the SchematronTransformer instance.
// LabelValidator.clear() creates a new instance (line 219), which naturally
// starts with an empty cache. Exposed for testing.
void clearCache() {
cachedTemplates.clear();
}

/** Returns the number of cached templates entries. Package-private for testing. */
int cacheSize() {
return cachedTemplates.size();
}

private static final ThreadLocal<MessageDigest> SHA256 = ThreadLocal.withInitial(() -> {
try {
return MessageDigest.getInstance("SHA-256");
} catch (NoSuchAlgorithmException e) {
throw new RuntimeException(e);
}
});

private static String sha256(String input) {
MessageDigest digest = SHA256.get();
digest.reset();
return HexFormat.of().formatHex(digest.digest(input.getBytes(StandardCharsets.UTF_8)));
}
/**
* Transform the given schematron.
*
Expand Down
106 changes: 106 additions & 0 deletions src/test/java/gov/nasa/pds/tools/label/SchematronTransformerTest.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,106 @@
package gov.nasa.pds.tools.label;

import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertNotNull;
import static org.junit.jupiter.api.Assertions.assertNotSame;

import javax.xml.transform.Transformer;
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Tag;
import org.junit.jupiter.api.Test;

/**
* Tests for {@link SchematronTransformer} cache behavior.
*
* <p>These tests exercise the full ISO schematron → XSLT compilation pipeline and
* therefore require Saxon-HE on the classpath. They are slower than pure unit tests
* on first run due to XSLT compilation overhead.
*/
@Tag("integration")
public class SchematronTransformerTest {

/** Minimal valid ISO Schematron document. */
private static final String MINIMAL_SCHEMATRON =
"<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n"
+ "<sch:schema xmlns:sch=\"http://purl.oclc.org/dsdl/schematron\" queryBinding=\"xslt2\">\n"
+ " <sch:title>Minimal test schematron</sch:title>\n"
+ " <sch:pattern>\n"
+ " <sch:rule context=\"/root\">\n"
+ " <sch:assert test=\"true()\">Always passes.</sch:assert>\n"
+ " </sch:rule>\n"
+ " </sch:pattern>\n"
+ "</sch:schema>";

/** A different schematron to verify distinct cache entries. */
private static final String OTHER_SCHEMATRON =
"<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n"
+ "<sch:schema xmlns:sch=\"http://purl.oclc.org/dsdl/schematron\" queryBinding=\"xslt2\">\n"
+ " <sch:title>Other test schematron</sch:title>\n"
+ " <sch:pattern>\n"
+ " <sch:rule context=\"/other\">\n"
+ " <sch:assert test=\"true()\">Also passes.</sch:assert>\n"
+ " </sch:rule>\n"
+ " </sch:pattern>\n"
+ "</sch:schema>";

private SchematronTransformer transformer;

@BeforeEach
void setUp() throws Exception {
transformer = new SchematronTransformer();
}

@Test
void testCachePopulatedOnFirstCall() throws Exception {
assertEquals(0, transformer.cacheSize(), "cache should start empty");

Transformer t = transformer.transform(MINIMAL_SCHEMATRON);
assertNotNull(t);
assertEquals(1, transformer.cacheSize(), "cache should have one entry after first call");
}

@Test
void testCacheHitOnRepeatedCall() throws Exception {
Transformer t1 = transformer.transform(MINIMAL_SCHEMATRON);
assertEquals(1, transformer.cacheSize());

Transformer t2 = transformer.transform(MINIMAL_SCHEMATRON);
assertEquals(1, transformer.cacheSize(), "cache size should remain 1 after repeated call");

// Each call should return a distinct Transformer (Transformer is not thread-safe)
assertNotSame(t1, t2, "each call should produce a new Transformer instance");
}

@Test
void testDistinctSchematronsGetSeparateCacheEntries() throws Exception {
transformer.transform(MINIMAL_SCHEMATRON);
assertEquals(1, transformer.cacheSize());

transformer.transform(OTHER_SCHEMATRON);
assertEquals(2, transformer.cacheSize(), "distinct schematrons should produce separate cache entries");
}

@Test
void testClearCacheRemovesAllEntries() throws Exception {
transformer.transform(MINIMAL_SCHEMATRON);
transformer.transform(OTHER_SCHEMATRON);
assertEquals(2, transformer.cacheSize());

transformer.clearCache();
assertEquals(0, transformer.cacheSize(), "cache should be empty after clearCache()");
}

@Test
void testCacheMissAfterClear() throws Exception {
transformer.transform(MINIMAL_SCHEMATRON);
assertEquals(1, transformer.cacheSize());

transformer.clearCache();
assertEquals(0, transformer.cacheSize());

// Re-compile should repopulate the cache
Transformer t = transformer.transform(MINIMAL_SCHEMATRON);
assertNotNull(t);
assertEquals(1, transformer.cacheSize(), "cache should repopulate after clear");
}
}