diff --git a/CHANGELOG.md b/CHANGELOG.md index 4f8ca3164..76588e0d0 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,6 +1,6 @@ # Changelog -## [«unknown»](https://github.com/NASA-PDS/validate/tree/«unknown») (2026-03-25) +## [«unknown»](https://github.com/NASA-PDS/validate/tree/«unknown») (2026-04-01) [Full Changelog](https://github.com/NASA-PDS/validate/compare/v4.0.8...«unknown») @@ -12,6 +12,8 @@ **Defects:** +- Validate does not count cleanly-passing products, incorrectly triggering no\_products\_found error [\#1557](https://github.com/NASA-PDS/validate/issues/1557) [[s.medium](https://github.com/NASA-PDS/validate/labels/s.medium)] +- validate does not fail or report errors when an explicitly-specified target file does not exist [\#1548](https://github.com/NASA-PDS/validate/issues/1548) [[s.medium](https://github.com/NASA-PDS/validate/labels/s.medium)] - error.directory.unallowed\_name not raised on Windows for absolute directory\_path\_name [\#1539](https://github.com/NASA-PDS/validate/issues/1539) [[s.medium](https://github.com/NASA-PDS/validate/labels/s.medium)] - When validate reports a directory has a warning it says "XML could not be parsed." which is misleading. [\#1510](https://github.com/NASA-PDS/validate/issues/1510) [[s.medium](https://github.com/NASA-PDS/validate/labels/s.medium)] - Referential integrity not being checked in all places there is an Internal\_Reference area [\#1481](https://github.com/NASA-PDS/validate/issues/1481) [[s.medium](https://github.com/NASA-PDS/validate/labels/s.medium)] diff --git a/pom.xml b/pom.xml index 027c4cfbd..a3a6c67f6 100644 --- a/pom.xml +++ b/pom.xml @@ -432,7 +432,7 @@ software.amazon.awssdk apache-client - 2.42.18 + 2.42.23 diff --git a/src/main/java/gov/nasa/pds/tools/label/CachedLSResourceResolver.java b/src/main/java/gov/nasa/pds/tools/label/CachedLSResourceResolver.java index e79e11c15..c93d2fed6 100644 --- a/src/main/java/gov/nasa/pds/tools/label/CachedLSResourceResolver.java +++ b/src/main/java/gov/nasa/pds/tools/label/CachedLSResourceResolver.java @@ -20,8 +20,8 @@ import java.net.MalformedURLException; import java.net.URL; import java.net.URLConnection; -import java.util.HashMap; import java.util.Map; +import java.util.concurrent.ConcurrentHashMap; import org.apache.commons.io.IOUtils; import org.w3c.dom.ls.LSInput; import org.w3c.dom.ls.LSResourceResolver; @@ -135,10 +135,15 @@ public void setSystemId(String systemId) { } } - /** Hashmap to hold the entities. */ - private Map cachedEntities = new HashMap<>(); + /** Thread-safe cache of resolved entities keyed by systemId. */ + private Map cachedEntities = new ConcurrentHashMap<>(); - private ProblemHandler handler; + /** + * Per-thread problem handler so that concurrent callers do not overwrite + * each other's handler when this resolver is shared (e.g. as an + * {@link XMLCatalogResolver} on the singleton {@code LabelValidator}). + */ + private final ThreadLocal handlerRef = new ThreadLocal<>(); /** * Constructor. @@ -150,11 +155,11 @@ public CachedLSResourceResolver() { /** * Constructor. * - * @param container A container to hold messages. + * @param handler A handler to receive problems during resource resolution. */ public CachedLSResourceResolver(ProblemHandler handler) { - cachedEntities = new HashMap<>(); - this.handler = handler; + cachedEntities = new ConcurrentHashMap<>(); + this.handlerRef.set(handler); } @Override @@ -163,8 +168,8 @@ public LSInput resolveResource(String type, String namespaceURI, String publicId if (systemId == null) { return null; } - byte[] entity = cachedEntities.get(systemId); LSInputImpl input = new LSInputImpl(); + byte[] entity = cachedEntities.get(systemId); if (entity == null) { InputStream in = null; URLConnection conn = null; @@ -194,16 +199,17 @@ public LSInput resolveResource(String type, String namespaceURI, String publicId conn = url.openConnection(); in = Utility.openConnection(conn); entity = IOUtils.toByteArray(in); - cachedEntities.put(systemId, entity); + cachedEntities.putIfAbsent(systemId, entity); } catch (Exception e) { - if (handler != null) { + ProblemHandler h = handlerRef.get(); + if (h != null) { URL u = null; try { u = new URL(systemId); } catch (MalformedURLException mu) { u = url; } - handler.addProblem(new ValidationProblem(new ProblemDefinition(ExceptionType.FATAL, + h.addProblem(new ValidationProblem(new ProblemDefinition(ExceptionType.FATAL, ProblemType.LABEL_UNRESOLVABLE_RESOURCE, e.getMessage()), u)); } else { e.printStackTrace(); @@ -233,10 +239,10 @@ public void addCachedEntities(Map entities) { } public void setProblemHandler(ProblemHandler handler) { - this.handler = handler; + this.handlerRef.set(handler); } public ProblemHandler getProblemHandler() { - return this.handler; + return this.handlerRef.get(); } } diff --git a/src/main/java/gov/nasa/pds/tools/label/LabelValidator.java b/src/main/java/gov/nasa/pds/tools/label/LabelValidator.java index 941bb7803..ddb02b3ba 100644 --- a/src/main/java/gov/nasa/pds/tools/label/LabelValidator.java +++ b/src/main/java/gov/nasa/pds/tools/label/LabelValidator.java @@ -26,6 +26,9 @@ import java.util.List; import java.util.Map; import java.util.Set; +import java.util.concurrent.ConcurrentHashMap; +import java.util.concurrent.atomic.AtomicLong; +import java.util.concurrent.atomic.DoubleAdder; import java.util.regex.Matcher; import java.util.regex.Pattern; import javax.xml.XMLConstants; @@ -88,23 +91,106 @@ /** * This class is responsible for providing utility functions for validating PDS XML Labels. * + *

Thread-safety contract

+ *

{@code LabelValidator} is designed to be used as a singleton (see + * {@code ValidationResourceManager}) with concurrent validation from multiple + * threads. The following rules apply:

+ *
    + *
  • Configuration methods ({@link #setSchema}, {@link #setSchematrons}, + * {@link #setCatalogs}, {@link #setSchemaCheck}, {@link #setSchematronCheck}, + * {@link #setCachedLSResourceResolver}, {@link #setCachedEntityResolver}, + * {@link #addValidator}, {@link #setSkipProductValidation}, etc.) are + * setup-time only and must be called from a single thread before + * any concurrent validation begins. They are not synchronized.
  • + *
  • Validation methods ({@link #validate}, {@link #parseAndValidate}) + * are safe for concurrent use. Per-thread mutable parsing state is held + * in a {@link ThreadLocal}{@code }.
  • + *
  • {@link #clear()} may be called between validation runs to reset + * configuration. It increments a generation counter so that all threads + * will discard stale {@code ParserState} on their next validation call.
  • + *
+ * + *

Note on static helpers: {@link gov.nasa.pds.tools.util.LabelUtil} + * methods called from {@code parseAndValidate} ({@code setLocation}, + * {@code registerIMVersion}) are internally {@code synchronized} and therefore + * safe under concurrent validation.

+ * * @author pramirez * */ public class LabelValidator { private static final Logger LOG = LoggerFactory.getLogger(LabelValidator.class); + + /** + * Holds per-thread mutable parsing state so that each thread gets its own + * parser, validator handler, document builder, etc. This allows removing + * {@code synchronized} from the validation methods. + * + *

A {@code generation} field tracks whether the owning {@code LabelValidator} + * has been {@link #clear() cleared} since this state was created. If the + * generation is stale, {@link #parseAndValidate} discards and re-creates the + * state so that configuration changes made via {@code clear()} are picked up + * by every thread, not just the one that called {@code clear()}.

+ * + *

Note: the constructor wraps {@link ParserConfigurationException} in an + * unchecked {@link RuntimeException} because {@link ThreadLocal#withInitial} + * cannot propagate checked exceptions.

+ */ + private static class ParserState { + XMLReader cachedParser; + ValidatorHandler cachedValidatorHandler; + DocumentBuilder docBuilder; + List cachedSchematron = new ArrayList<>(); + CachedLSResourceResolver cachedLSResolver = new CachedLSResourceResolver(); + Schema validatingSchema; + SAXParserFactory saxParserFactory; + SchemaFactory schemaFactory; + long generation; + + ParserState(long generation) { + this.generation = generation; + try { + // Support for XSD 1.1 + schemaFactory = SchemaFactory.newInstance(XMLConstants.W3C_XML_SCHEMA_NS_URI); + + saxParserFactory = SAXParserFactory.newInstance(); + saxParserFactory.setNamespaceAware(true); + saxParserFactory.setXIncludeAware(Utility.supportXincludes()); + saxParserFactory.setValidating(false); + + try { + saxParserFactory.setFeature( + "http://apache.org/xml/features/xinclude/fixup-base-uris", false); + } catch (SAXNotRecognizedException | SAXNotSupportedException e) { + // should never happen, and can't recover + } + + docBuilder = DocumentBuilderFactory.newInstance().newDocumentBuilder(); + } catch (ParserConfigurationException e) { + throw new RuntimeException("Failed to initialise per-thread ParserState", e); + } + } + } + + /** + * Monotonically increasing generation counter, incremented by {@link #clear()}. + * Each {@link ParserState} records the generation it was created for; when a + * thread detects a mismatch it discards its stale state and creates a fresh one. + */ + private final AtomicLong configGeneration = new AtomicLong(0); + + private final ThreadLocal threadState = + ThreadLocal.withInitial(() -> new ParserState(configGeneration.get())); + private Map configurations = new HashMap<>(); private List userSchemaFiles; private List userSchematronFiles; private List userSchematronTransformers; - private XMLReader cachedParser; - private ValidatorHandler cachedValidatorHandler; - private List cachedSchematron; private XMLCatalogResolver resolver; private Boolean useLabelSchema; private Boolean useLabelSchematron; private Boolean skipProductValidation; - private Map cachedLabelSchematrons; + private volatile Map cachedLabelSchematrons; public static final String SCHEMA_CHECK = "gov.nasa.pds.tools.label.SchemaCheck"; public static final String SCHEMATRON_CHECK = "gov.nasa.pds.tools.label.SchematronCheck"; @@ -112,28 +198,24 @@ public class LabelValidator { private List externalValidators; private List documentValidators; private CachedEntityResolver cachedEntityResolver; - private CachedLSResourceResolver cachedLSResolver; - private SAXParserFactory saxParserFactory; - private DocumentBuilder docBuilder; - private SchemaFactory schemaFactory; - private Schema validatingSchema; + private volatile CachedLSResourceResolver sharedCachedLSResolver; private SchematronTransformer schematronTransformer; - private long filesProcessed = 0; - private double totalTimeElapsed = 0.0; + private final AtomicLong filesProcessed = new AtomicLong(0); + private final DoubleAdder totalTimeElapsed = new DoubleAdder(); /** * Returns the number of files processed by the validation function. */ public long getFilesProcessed() { - return (this.filesProcessed); + return this.filesProcessed.get(); } /** * Returns the duration it took to run the validation function. */ public double getTotalTimeElapsed() { - return (this.totalTimeElapsed); + return this.totalTimeElapsed.sum(); } /** @@ -173,9 +255,6 @@ public void clear() throws ParserConfigurationException, TransformerConfiguratio this.configurations.clear(); this.configurations.put(SCHEMA_CHECK, true); this.configurations.put(SCHEMATRON_CHECK, true); - cachedParser = null; - cachedValidatorHandler = null; - cachedSchematron = new ArrayList<>(); userSchemaFiles = new ArrayList<>(); userSchematronFiles = new ArrayList<>(); userSchematronTransformers = new ArrayList<>(); @@ -184,36 +263,14 @@ public void clear() throws ParserConfigurationException, TransformerConfiguratio documentValidators = new ArrayList<>(); useLabelSchema = false; useLabelSchematron = false; - cachedLabelSchematrons = new HashMap<>(); + skipProductValidation = false; + cachedLabelSchematrons = new ConcurrentHashMap<>(); cachedEntityResolver = new CachedEntityResolver(); - cachedLSResolver = new CachedLSResourceResolver(); - validatingSchema = null; - - // Support for XSD 1.1 - schemaFactory = SchemaFactory.newInstance(XMLConstants.W3C_XML_SCHEMA_NS_URI); - - // We need a SAX parser factory to do the validating parse - // to create the DOM tree, so we can insert line numbers - // as user data properties of the DOM nodes. - saxParserFactory = SAXParserFactory.newInstance(); - saxParserFactory.setNamespaceAware(true); - saxParserFactory.setXIncludeAware(Utility.supportXincludes()); - // The parser doesn't validate - we use a Validator instead. - saxParserFactory.setValidating(false); - - // Don't add xml:base attributes to xi:include content, or it messes up - // PDS4 validation. - try { - saxParserFactory.setFeature("http://apache.org/xml/features/xinclude/fixup-base-uris", false); - } catch (SAXNotRecognizedException e) { - // should never happen, and can't recover - } catch (SAXNotSupportedException e) { - // should never happen, and can't recover - } + sharedCachedLSResolver = null; - // We need a document builder to create new documents to insert - // parsed XML nodes. - docBuilder = DocumentBuilderFactory.newInstance().newDocumentBuilder(); + // Bump generation so all threads discard stale ParserState on next use + configGeneration.incrementAndGet(); + threadState.remove(); documentValidators.add(new DefaultDocumentValidator()); schematronTransformer = new SchematronTransformer(); @@ -225,7 +282,7 @@ public void clear() throws ParserConfigurationException, TransformerConfiguratio * @param schematronMap */ public void setLabelSchematrons(Map schematronMap) { - cachedLabelSchematrons = schematronMap; + cachedLabelSchematrons = new ConcurrentHashMap<>(schematronMap); } /** @@ -257,13 +314,14 @@ public XMLCatalogResolver getCatalogResolver() { return this.resolver; } - private List loadSchemaSources(List schemas) throws IOException, SAXException { + private List loadSchemaSources(ParserState ps, List schemas) + throws IOException, SAXException { List sources = new ArrayList<>(); LOG.debug("loadSchemaSources:schemas {}", schemas); String externalLocations = ""; for (URL schema : schemas) { LSInput input = - cachedLSResolver.resolveResource("", "", "", schema.toString(), schema.toString()); + ps.cachedLSResolver.resolveResource("", "", "", schema.toString(), schema.toString()); StreamSource streamSource = new StreamSource(input.getByteStream()); streamSource.setSystemId(schema.toString()); sources.add(streamSource); @@ -281,7 +339,8 @@ private List loadSchemaSources(List schemas) throws IOExcepti + schema.toString() + "': " + e.getMessage()); } } - schemaFactory.setProperty("http://apache.org/xml/properties/schema/external-schemaLocation", + ps.schemaFactory.setProperty( + "http://apache.org/xml/properties/schema/external-schemaLocation", externalLocations); LOG.debug("loadSchemaSources:schemas,externalLocations,sources {}", schemas, externalLocations, sources); @@ -298,7 +357,7 @@ private List loadSchemaSources(String[] schemaFiles) { return sources; } - public synchronized void validate(ProblemHandler handler, File labelFile) throws SAXException, + public void validate(ProblemHandler handler, File labelFile) throws SAXException, IOException, ParserConfigurationException, TransformerException, MissingLabelSchemaException { validate(handler, labelFile.toURI().toURL()); } @@ -315,12 +374,12 @@ public synchronized void validate(ProblemHandler handler, File labelFile) throws * @throws TransformerException if there is an error during Schematron transformation * @throws MissingLabelSchemaException if the label schema cannot be found */ - public synchronized void validate(ProblemHandler handler, URL url) throws SAXException, + public void validate(ProblemHandler handler, URL url) throws SAXException, IOException, ParserConfigurationException, TransformerException, MissingLabelSchemaException { this.parseAndValidate(handler, url); } - public synchronized void validate(ProblemHandler handler, URL url, String labelExtension) + public void validate(ProblemHandler handler, URL url, String labelExtension) throws SAXException, IOException, ParserConfigurationException, TransformerException, MissingLabelSchemaException { this.parseAndValidate(handler, url); @@ -453,13 +512,20 @@ private void checkSchemaSchematronVersions(ProblemHandler handler, URL url) { * @throws TransformerException if there is an error during Schematron transformation * @throws MissingLabelSchemaException if the label schema cannot be found */ - public synchronized Document parseAndValidate(ProblemHandler handler, URL url) + public Document parseAndValidate(ProblemHandler handler, URL url) throws SAXException, IOException, ParserConfigurationException, TransformerException, MissingLabelSchemaException { EveryNCounter.getInstance().increment(Group.label_validation); List labelSchematronRefs = new ArrayList<>(); Document xml = null; + ParserState ps = threadState.get(); + // If clear() was called since this ParserState was created, discard and re-create + if (ps.generation != configGeneration.get()) { + threadState.remove(); + ps = threadState.get(); + } + // Printing debug is expensive. Should uncomment by developer only. long startTime = System.currentTimeMillis(); @@ -472,32 +538,32 @@ public synchronized Document parseAndValidate(ProblemHandler handler, URL url) // Are we performing schema validation? if (performsSchemaValidation()) { - createParserIfNeeded(handler); + createParserIfNeeded(ps, handler); checkSchemaSchematronVersions(handler, url); // Do we need this to clear the cache? if (useLabelSchema) { LOG.debug("parseAndValidate:#00AA0"); - cachedValidatorHandler = schemaFactory.newSchema().newValidatorHandler(); + ps.cachedValidatorHandler = ps.schemaFactory.newSchema().newValidatorHandler(); } else { LOG.debug("parseAndValidate:#00AA1"); - cachedValidatorHandler = validatingSchema.newValidatorHandler(); + ps.cachedValidatorHandler = ps.validatingSchema.newValidatorHandler(); } // Capture messages in a container if (handler != null) { LOG.debug("parseAndValidate:#00AA2"); ErrorHandler eh = new LabelErrorHandler(handler); - cachedParser.setErrorHandler(eh); - cachedValidatorHandler.setErrorHandler(eh); + ps.cachedParser.setErrorHandler(eh); + ps.cachedValidatorHandler.setErrorHandler(eh); } LOG.debug("parseAndValidate:#00AA3"); // Finally parse and validate the file - xml = docBuilder.newDocument(); - cachedParser.setContentHandler(new DocumentCreator(xml)); - cachedParser.parse(Utility.getInputSourceByURL(url)); + xml = ps.docBuilder.newDocument(); + ps.cachedParser.setContentHandler(new DocumentCreator(xml)); + ps.cachedParser.parse(Utility.getInputSourceByURL(url)); // Each version of the Information Model (IM) must be registered so in the end, // multiple versions can be reported. @@ -506,19 +572,19 @@ public synchronized Document parseAndValidate(ProblemHandler handler, URL url) LabelUtil.registerIMVersion(informationModelVersion); DOMLocator locator = new DOMLocator(url); - cachedValidatorHandler.setDocumentLocator(locator); + ps.cachedValidatorHandler.setDocumentLocator(locator); if (resolver != null) { LOG.debug("parseAndValidate:#00AA4"); - cachedValidatorHandler.setResourceResolver(resolver); + ps.cachedValidatorHandler.setResourceResolver(resolver); resolver.setProblemHandler(handler); } else { LOG.debug("parseAndValidate:#00AA5"); - cachedValidatorHandler.setResourceResolver(cachedLSResolver); + ps.cachedValidatorHandler.setResourceResolver(ps.cachedLSResolver); } if (!skipProductValidation) { LOG.debug("parseAndValidate:#00AA6"); - walkNode(xml, cachedValidatorHandler, locator); + walkNode(xml, ps.cachedValidatorHandler, locator); } LOG.debug("parseAndValidate:#00AA7"); @@ -533,8 +599,8 @@ public synchronized Document parseAndValidate(ProblemHandler handler, URL url) } } else { // No Schema validation will be performed. Just parse the label - XMLReader reader = saxParserFactory.newSAXParser().getXMLReader(); - xml = docBuilder.newDocument(); + XMLReader reader = ps.saxParserFactory.newSAXParser().getXMLReader(); + xml = ps.docBuilder.newDocument(); reader.setContentHandler(new DocumentCreator(xml)); // Capture messages in a container if (handler != null) { @@ -554,7 +620,7 @@ public synchronized Document parseAndValidate(ProblemHandler handler, URL url) // succeed. LOG.debug("parseAndValidate:0001:url,useLabelSchematron,cachedSchematron.size() {},{},{}", url, - useLabelSchematron, cachedSchematron.size()); + useLabelSchematron, ps.cachedSchematron.size()); // Validate with any schematron files we have if (performsSchematronValidation()) { // Look for schematron files specified in a label @@ -562,25 +628,25 @@ public synchronized Document parseAndValidate(ProblemHandler handler, URL url) labelSchematronRefs = getSchematrons(xml.getChildNodes(), url, handler); } LOG.debug("parseAndValidate:0002:url,useLabelSchematron,cachedSchematron.size() {},{},{}", - url, useLabelSchematron, cachedSchematron.size()); - if (cachedSchematron.isEmpty()) { + url, useLabelSchematron, ps.cachedSchematron.size()); + if (ps.cachedSchematron.isEmpty()) { if (useLabelSchematron) { - cachedSchematron = loadLabelSchematrons(labelSchematronRefs, url, handler); + ps.cachedSchematron = loadLabelSchematrons(labelSchematronRefs, url, handler); } else if (!userSchematronTransformers.isEmpty()) { - cachedSchematron = userSchematronTransformers; + ps.cachedSchematron = userSchematronTransformers; LOG.debug("parseAndValidate:0003:url,useLabelSchematron,cachedSchematron.size() {},{},{}", - url, useLabelSchematron, cachedSchematron.size()); + url, useLabelSchematron, ps.cachedSchematron.size()); } else if (!userSchematronFiles.isEmpty()) { List transformers = new ArrayList<>(); for (URL schematron : userSchematronFiles) { transformers.add(schematronTransformer.fetch(schematron, handler)); } - cachedSchematron = transformers; + ps.cachedSchematron = transformers; LOG.debug("parseAndValidate:0004:url,useLabelSchematron,cachedSchematron.size() {},{},{}", - url, useLabelSchematron, cachedSchematron.size()); + url, useLabelSchematron, ps.cachedSchematron.size()); } LOG.debug("parseAndValidate:0010:url,useLabelSchematron,cachedSchematron.size() {},{},{}", - url, useLabelSchematron, cachedSchematron.size()); + url, useLabelSchematron, ps.cachedSchematron.size()); } else { // If there are cached schematrons.... LOG.debug( @@ -588,16 +654,16 @@ public synchronized Document parseAndValidate(ProblemHandler handler, URL url) url, useLabelSchematron, userSchematronTransformers.isEmpty()); if (useLabelSchematron) { if (!userSchematronTransformers.isEmpty()) { - cachedSchematron = userSchematronTransformers; + ps.cachedSchematron = userSchematronTransformers; } else { - cachedSchematron = loadLabelSchematrons(labelSchematronRefs, url, handler); + ps.cachedSchematron = loadLabelSchematrons(labelSchematronRefs, url, handler); } } LOG.debug("parseAndValidate:0020:url,useLabelSchematron,cachedSchematron.size() {},{},{}", - url, useLabelSchematron, cachedSchematron.size()); + url, useLabelSchematron, ps.cachedSchematron.size()); } LOG.debug("parseAndValidate:0030:url,useLabelSchematron,cachedSchematron.size() {},{},{}", - url, useLabelSchematron, cachedSchematron.size()); + url, useLabelSchematron, ps.cachedSchematron.size()); // Determine if schematron validation should be done or not. // Note: schematron validation can be time consuming. Only the bundle or @@ -608,7 +674,7 @@ public synchronized Document parseAndValidate(ProblemHandler handler, URL url) LOG.debug("parseAndValidate:url,skipProductValidation,validateAgainstSchematronFlag {},{},{}", url, skipProductValidation, validateAgainstSchematronFlag); - for (String schematron : cachedSchematron) { + for (String schematron : ps.cachedSchematron) { try { long singleSchematronStartTime = System.currentTimeMillis(); if (!validateAgainstSchematronFlag) { @@ -664,83 +730,95 @@ public synchronized Document parseAndValidate(ProblemHandler handler, URL url) } } - this.filesProcessed += 1; + this.filesProcessed.incrementAndGet(); long finishTime = System.currentTimeMillis(); long timeElapsed = finishTime - startTime; - this.totalTimeElapsed += timeElapsed; + this.totalTimeElapsed.add(timeElapsed); LOG.debug( "parseAndValidate:url,skipProductValidation,this.filesProcessed,timeElapsed,this.totalTimeElapsed/1000.0 {},{},{},{},{}", - url, skipProductValidation, this.filesProcessed, timeElapsed, - this.totalTimeElapsed / 1000.0); + url, skipProductValidation, this.filesProcessed.get(), timeElapsed, + this.totalTimeElapsed.sum() / 1000.0); return xml; } - private void createParserIfNeeded(ProblemHandler handler) throws SAXNotRecognizedException, - SAXNotSupportedException, SAXException, IOException, ParserConfigurationException { + private void createParserIfNeeded(ParserState ps, ProblemHandler handler) + throws SAXNotRecognizedException, SAXNotSupportedException, SAXException, IOException, + ParserConfigurationException { // Do we have a schema we have loaded previously? - LOG.debug("createParserIfNeeded:cachedParser,resolver,handler {},{},{}", cachedParser, resolver, - handler); + LOG.debug("createParserIfNeeded:cachedParser,resolver,handler {},{},{}", ps.cachedParser, + resolver, handler); LOG.debug("createParserIfNeeded:#00BB0"); - if (cachedParser == null) { + if (ps.cachedParser == null) { LOG.debug("createParserIfNeeded:#00BB1"); // If catalog is used, allow resources to be loaded for schemas // and the document parser if (resolver != null) { LOG.debug("createParserIfNeeded:#00BB2"); - schemaFactory.setProperty("http://apache.org/xml/properties/internal/entity-resolver", - resolver); + ps.schemaFactory.setProperty( + "http://apache.org/xml/properties/internal/entity-resolver", resolver); } LOG.debug("createParserIfNeeded:#00BB3"); // Allow errors that happen in the schema to be logged there if (handler != null) { LOG.debug("createParserIfNeeded:#00BB4"); - schemaFactory.setErrorHandler(new LabelErrorHandler(handler)); - cachedLSResolver = new CachedLSResourceResolver(handler); - schemaFactory.setResourceResolver(cachedLSResolver); + ps.schemaFactory.setErrorHandler(new LabelErrorHandler(handler)); + // Use the shared resolver set via setCachedLSResourceResolver() if available, + // otherwise create a fresh per-thread resolver. + if (sharedCachedLSResolver != null) { + ps.cachedLSResolver = sharedCachedLSResolver; + } else { + ps.cachedLSResolver = new CachedLSResourceResolver(handler); + } + ps.cachedLSResolver.setProblemHandler(handler); + ps.schemaFactory.setResourceResolver(ps.cachedLSResolver); } else { LOG.debug("createParserIfNeeded:#00BB5"); - cachedLSResolver = new CachedLSResourceResolver(); - schemaFactory.setResourceResolver(cachedLSResolver); + if (sharedCachedLSResolver != null) { + ps.cachedLSResolver = sharedCachedLSResolver; + } else { + ps.cachedLSResolver = new CachedLSResourceResolver(); + } + ps.schemaFactory.setResourceResolver(ps.cachedLSResolver); } LOG.debug("createParserIfNeeded:#00BB6"); // Time to load schema that will be used for validation if (!userSchemaFiles.isEmpty()) { LOG.debug("createParserIfNeeded:#00BB7"); // User has specified schema files to use - validatingSchema = schemaFactory - .newSchema(loadSchemaSources(userSchemaFiles).toArray(new StreamSource[0])); + ps.validatingSchema = ps.schemaFactory + .newSchema(loadSchemaSources(ps, userSchemaFiles).toArray(new StreamSource[0])); } else if (resolver == null) { LOG.debug("createParserIfNeeded:#00BB8"); if (useLabelSchema || !VersionInfo.hasSchemaDir()) { LOG.debug("createParserIfNeeded:#00BB9"); - validatingSchema = schemaFactory.newSchema(); + ps.validatingSchema = ps.schemaFactory.newSchema(); } else { LOG.debug("createParserIfNeeded:#00BC0"); // Load from user specified external directory - validatingSchema = schemaFactory.newSchema( + ps.validatingSchema = ps.schemaFactory.newSchema( loadSchemaSources(VersionInfo.getSchemasFromDirectory().toArray(new String[0])) .toArray(new StreamSource[0])); } } else { LOG.debug("createParserIfNeeded:#00BC1"); // We're only going to use the catalog to validate against. - validatingSchema = schemaFactory.newSchema(); + ps.validatingSchema = ps.schemaFactory.newSchema(); } LOG.debug("createParserIfNeeded:#00BC2"); - cachedParser = saxParserFactory.newSAXParser().getXMLReader(); - cachedValidatorHandler = validatingSchema.newValidatorHandler(); + ps.cachedParser = ps.saxParserFactory.newSAXParser().getXMLReader(); + ps.cachedValidatorHandler = ps.validatingSchema.newValidatorHandler(); if (resolver != null) { LOG.debug("createParserIfNeeded:#00BC3"); - cachedParser.setEntityResolver(resolver); - docBuilder.setEntityResolver(resolver); + ps.cachedParser.setEntityResolver(resolver); + ps.docBuilder.setEntityResolver(resolver); } else if (useLabelSchema) { LOG.debug("createParserIfNeeded:#00BC4"); - cachedParser.setEntityResolver(cachedEntityResolver); + ps.cachedParser.setEntityResolver(cachedEntityResolver); } LOG.debug("createParserIfNeeded:#00BC5"); LOG.debug("createParserIfNeeded:cachedParser,cachedValidatorHandler,resolver {},{},{}", - cachedParser, cachedValidatorHandler, resolver); + ps.cachedParser, ps.cachedValidatorHandler, resolver); } else { // TODO: This code doesn't look right. It says that if we have // a cached parser, but we are using the label schema, then @@ -753,9 +831,9 @@ private void createParserIfNeeded(ProblemHandler handler) throws SAXNotRecognize LOG.debug("createParserIfNeeded:#00BC6"); if (useLabelSchema) { LOG.debug("createParserIfNeeded:#00BC7"); - cachedParser = saxParserFactory.newSAXParser().getXMLReader(); - cachedValidatorHandler = schemaFactory.newSchema().newValidatorHandler(); - cachedParser.setEntityResolver(cachedEntityResolver); + ps.cachedParser = ps.saxParserFactory.newSAXParser().getXMLReader(); + ps.cachedValidatorHandler = ps.schemaFactory.newSchema().newValidatorHandler(); + ps.cachedParser.setEntityResolver(cachedEntityResolver); } LOG.debug("createParserIfNeeded:#00BC8"); } @@ -896,20 +974,34 @@ private List loadLabelSchematrons(List schematronSources, URL ur + "' through the catalog: " + io.getMessage()); } } - if (!this.cachedLabelSchematrons.containsKey(source)) { - URL sourceUrl = new URL(source); - LOG.debug("loadLabelSchematrons:sourceUrl {}", sourceUrl); - document = schematronTransformer.fetch(sourceUrl); - cachedLabelSchematrons.put(source, document); - } - document = cachedLabelSchematrons.get(source); + final String schematronSource = source; + document = cachedLabelSchematrons.computeIfAbsent(schematronSource, key -> { + try { + URL sourceUrl = new URL(key); + LOG.debug("loadLabelSchematrons:sourceUrl {}", sourceUrl); + return schematronTransformer.fetch(sourceUrl); + } catch (RuntimeException re) { + throw re; + } catch (Exception e) { + throw new RuntimeException(e); + } + }); transformers.add(document); LOG.debug("loadLabelSchematrons:transformers.add:source {}", source); - } catch (TransformerException te) { - String message = "Schematron '" + source + "' error: " + te.getMessage(); - handler.addProblem(new ValidationProblem( - new ProblemDefinition(ExceptionType.ERROR, ProblemType.SCHEMATRON_ERROR, message), - url)); + } catch (RuntimeException re) { + // Unwrap checked exceptions wrapped by computeIfAbsent lambda + Throwable cause = re.getCause(); + if (cause instanceof TransformerException) { + String message = "Schematron '" + source + "' error: " + cause.getMessage(); + handler.addProblem(new ValidationProblem( + new ProblemDefinition(ExceptionType.ERROR, ProblemType.SCHEMATRON_ERROR, message), + url)); + } else { + String message = "Error occurred while loading schematron: " + (cause != null ? cause.getMessage() : re.getMessage()); + handler.addProblem(new ValidationProblem( + new ProblemDefinition(ExceptionType.ERROR, ProblemType.SCHEMATRON_ERROR, message), + url)); + } } catch (Exception e) { String message = "Error occurred while loading schematron: " + e.getMessage(); handler.addProblem(new ValidationProblem( @@ -1030,8 +1122,13 @@ public void setCachedEntityResolver(CachedEntityResolver resolver) { this.cachedEntityResolver = resolver; } + /** + * Sets a shared {@link CachedLSResourceResolver} that will be propagated to + * each thread's {@link ParserState} during {@link #createParserIfNeeded}. + * This is a setup-time method and must be called before concurrent validation. + */ public void setCachedLSResourceResolver(CachedLSResourceResolver resolver) { - this.cachedLSResolver = resolver; + this.sharedCachedLSResolver = resolver; } /** diff --git a/src/main/java/gov/nasa/pds/validate/ValidateLauncher.java b/src/main/java/gov/nasa/pds/validate/ValidateLauncher.java index 3cd2f716e..00b802f72 100644 --- a/src/main/java/gov/nasa/pds/validate/ValidateLauncher.java +++ b/src/main/java/gov/nasa/pds/validate/ValidateLauncher.java @@ -1504,6 +1504,25 @@ public boolean doValidation(Map checksumManifest) throws Exception } LOG.debug("ValidateLauncher:doValidation: validator.validate():target {}", target); + + // Check if explicitly-specified target file exists before running validation. + // If not, record an error directly so the product shows FAIL and the summary + // reflects the error (issue #1548). + File targetFile = FileUtils.toFile(target); + if (targetFile != null && !targetFile.exists()) { + ValidationProblem p = new ValidationProblem( + new ProblemDefinition(ExceptionType.ERROR, ProblemType.MISSING_REFERENCED_FILE, + "Target path does not exist: " + target.getPath()), + target); + try { + report.record(target.toURI(), p); + } catch (URISyntaxException e) { + // Should not happen + } + success = false; + continue; + } + validator.validate(monitor, target); monitor.endValidation(); diff --git a/src/main/java/gov/nasa/pds/validate/report/Report.java b/src/main/java/gov/nasa/pds/validate/report/Report.java index 0de43aff1..4f0460569 100644 --- a/src/main/java/gov/nasa/pds/validate/report/Report.java +++ b/src/main/java/gov/nasa/pds/validate/report/Report.java @@ -228,7 +228,7 @@ final public Status record(URI sourceUri, final List problems } } } else { - if (!Utility.isDir(sourceUri.toString()) && problems.size() - ignoreFromTotalCounts > 0) { + if (!Utility.isDir(sourceUri.toString())) { if (!this.integrityCheckFlag) { this.numPassedProds++; } else { diff --git a/src/test/java/gov/nasa/pds/tools/label/LabelValidatorConcurrencyTest.java b/src/test/java/gov/nasa/pds/tools/label/LabelValidatorConcurrencyTest.java new file mode 100644 index 000000000..81545a249 --- /dev/null +++ b/src/test/java/gov/nasa/pds/tools/label/LabelValidatorConcurrencyTest.java @@ -0,0 +1,291 @@ +package gov.nasa.pds.tools.label; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertNotNull; +import static org.junit.jupiter.api.Assertions.assertTrue; +import static org.junit.jupiter.api.Assertions.fail; + +import java.io.BufferedWriter; +import java.io.File; +import java.io.FileWriter; +import java.net.URL; +import java.util.ArrayList; +import java.util.Collections; +import java.util.List; +import java.util.concurrent.CyclicBarrier; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Executors; +import java.util.concurrent.Future; +import java.util.concurrent.TimeUnit; + +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.io.TempDir; +import org.w3c.dom.Document; + +import gov.nasa.pds.tools.validate.ProblemContainer; + +/** + * Concurrent integration test for {@link LabelValidator}. + * + *

Validates multiple labels from a thread pool concurrently to verify: + *

    + *
  • No {@code ConcurrentModificationException} from shared state
  • + *
  • Each thread receives a correct, non-null {@link Document}
  • + *
  • Problem handlers are not cross-contaminated between threads
  • + *
+ * + *

Uses local schemas from the github71 test resources to avoid network I/O. + */ +class LabelValidatorConcurrencyTest { + + /** Test data directory containing github71 labels and local schemas. */ + private static final String GITHUB71_DIR = "src/test/resources/github71"; + + private LabelValidator validator; + + @TempDir + File tempDir; + + @BeforeEach + void setUp() throws Exception { + String testPath = new File(System.getProperty("user.dir"), GITHUB71_DIR).getAbsolutePath(); + + // Build a catalog file that rewrites PDS4 schema URLs to local files + File catFile = new File(tempDir, "catalog.xml"); + String catText = "\n" + + "\n" + + " \n" + + " \n" + + ""; + try (BufferedWriter w = new BufferedWriter(new FileWriter(catFile))) { + w.write(catText); + } + + validator = new LabelValidator(); + validator.setCatalogs(new String[] {catFile.getAbsolutePath()}); + validator.setSchemaCheck(true); + validator.setSchematronCheck(false); + validator.setSkipProductValidation(true); + } + + /** + * Validates the same label from N threads concurrently and asserts that every + * thread receives a non-null Document and that no exceptions are thrown. + */ + @Test + void concurrentParseAndValidate_sameLabel() throws Exception { + int threadCount = 4; + int iterationsPerThread = 3; + + URL labelUrl = findTestLabel(); + if (labelUrl == null) { + System.err.println("Skipping concurrentParseAndValidate_sameLabel: no test label found"); + return; + } + + ExecutorService pool = Executors.newFixedThreadPool(threadCount); + CyclicBarrier barrier = new CyclicBarrier(threadCount); + List errors = Collections.synchronizedList(new ArrayList<>()); + List> futures = new ArrayList<>(); + + for (int t = 0; t < threadCount; t++) { + futures.add(pool.submit(() -> { + try { + barrier.await(30, TimeUnit.SECONDS); + for (int i = 0; i < iterationsPerThread; i++) { + ProblemContainer handler = new ProblemContainer(); + Document doc = validator.parseAndValidate(handler, labelUrl); + assertNotNull(doc, "Document should not be null"); + } + } catch (Throwable ex) { + errors.add(ex); + } + })); + } + + for (Future f : futures) { + f.get(120, TimeUnit.SECONDS); + } + pool.shutdown(); + assertTrue(pool.awaitTermination(60, TimeUnit.SECONDS), "Pool should terminate"); + + if (!errors.isEmpty()) { + StringBuilder sb = new StringBuilder("Concurrent validation produced errors:\n"); + for (Throwable err : errors) { + sb.append(" ").append(err.getClass().getName()) + .append(": ").append(err.getMessage()).append("\n"); + err.printStackTrace(); + } + fail(sb.toString()); + } + + long expectedFiles = (long) threadCount * iterationsPerThread; + assertEquals(expectedFiles, validator.getFilesProcessed(), + "filesProcessed should equal threadCount * iterationsPerThread"); + assertTrue(validator.getTotalTimeElapsed() > 0, "totalTimeElapsed should be positive"); + } + + /** + * Verifies that {@link LabelValidator#clear()} properly invalidates all + * threads' cached ParserState via the generation counter. + */ + @Test + void clearInvalidatesAllThreads() throws Exception { + URL labelUrl = findTestLabel(); + if (labelUrl == null) { + System.err.println("Skipping clearInvalidatesAllThreads: no test label found"); + return; + } + + int threadCount = 4; + ExecutorService pool = Executors.newFixedThreadPool(threadCount); + CyclicBarrier barrier = new CyclicBarrier(threadCount); + List errors = Collections.synchronizedList(new ArrayList<>()); + + // Phase 1: warm up ParserState on all threads + List> futures = new ArrayList<>(); + for (int t = 0; t < threadCount; t++) { + futures.add(pool.submit(() -> { + try { + barrier.await(30, TimeUnit.SECONDS); + ProblemContainer handler = new ProblemContainer(); + validator.parseAndValidate(handler, labelUrl); + } catch (Throwable ex) { + errors.add(ex); + } + })); + } + for (Future f : futures) { + f.get(120, TimeUnit.SECONDS); + } + + // Phase 2: clear() from the main thread, then reconfigure + String testPath = new File(System.getProperty("user.dir"), GITHUB71_DIR).getAbsolutePath(); + File catFile = new File(tempDir, "catalog2.xml"); + String catText = "\n" + + "\n" + + " \n" + + " \n" + + ""; + try (BufferedWriter w = new BufferedWriter(new FileWriter(catFile))) { + w.write(catText); + } + + validator.clear(); + validator.setCatalogs(new String[] {catFile.getAbsolutePath()}); + validator.setSchemaCheck(true); + validator.setSchematronCheck(false); + validator.setSkipProductValidation(true); + + // Phase 3: validate again - should pick up fresh state via generation counter + CyclicBarrier barrier2 = new CyclicBarrier(threadCount); + futures.clear(); + for (int t = 0; t < threadCount; t++) { + futures.add(pool.submit(() -> { + try { + barrier2.await(30, TimeUnit.SECONDS); + ProblemContainer handler = new ProblemContainer(); + Document doc = validator.parseAndValidate(handler, labelUrl); + assertNotNull(doc, "Document should not be null after clear()"); + } catch (Throwable ex) { + errors.add(ex); + } + })); + } + for (Future f : futures) { + f.get(120, TimeUnit.SECONDS); + } + + pool.shutdown(); + assertTrue(pool.awaitTermination(60, TimeUnit.SECONDS), "Pool should terminate"); + + if (!errors.isEmpty()) { + StringBuilder sb = new StringBuilder("Post-clear concurrent validation produced errors:\n"); + for (Throwable err : errors) { + sb.append(" ").append(err.getClass().getName()) + .append(": ").append(err.getMessage()).append("\n"); + err.printStackTrace(); + } + fail(sb.toString()); + } + } + + /** + * Validates with schematron checking enabled from multiple threads concurrently. + * This exercises the {@code loadLabelSchematrons} / {@code cachedLabelSchematrons} + * {@code ConcurrentHashMap} path and the shared {@code CachedLSResourceResolver} + * under concurrent access. + */ + @Test + void concurrentParseAndValidate_withSchematron() throws Exception { + // Enable label-schematron validation so loadLabelSchematrons is exercised + validator.setSchematronCheck(true, true); + + int threadCount = 4; + int iterationsPerThread = 2; + + URL labelUrl = findTestLabel(); + if (labelUrl == null) { + System.err.println( + "Skipping concurrentParseAndValidate_withSchematron: no test label found"); + return; + } + + ExecutorService pool = Executors.newFixedThreadPool(threadCount); + CyclicBarrier barrier = new CyclicBarrier(threadCount); + List errors = Collections.synchronizedList(new ArrayList<>()); + List> futures = new ArrayList<>(); + + for (int t = 0; t < threadCount; t++) { + futures.add(pool.submit(() -> { + try { + barrier.await(30, TimeUnit.SECONDS); + for (int i = 0; i < iterationsPerThread; i++) { + ProblemContainer handler = new ProblemContainer(); + Document doc = validator.parseAndValidate(handler, labelUrl); + assertNotNull(doc, "Document should not be null with schematron enabled"); + } + } catch (Throwable ex) { + errors.add(ex); + } + })); + } + + for (Future f : futures) { + f.get(120, TimeUnit.SECONDS); + } + pool.shutdown(); + assertTrue(pool.awaitTermination(60, TimeUnit.SECONDS), "Pool should terminate"); + + if (!errors.isEmpty()) { + StringBuilder sb = new StringBuilder( + "Concurrent schematron validation produced errors:\n"); + for (Throwable err : errors) { + sb.append(" ").append(err.getClass().getName()) + .append(": ").append(err.getMessage()).append("\n"); + err.printStackTrace(); + } + fail(sb.toString()); + } + } + + /** + * Locates a PDS4 test label from the github71 test resources. + */ + private URL findTestLabel() { + File f = new File(System.getProperty("user.dir"), GITHUB71_DIR + "/ELE_MOM.xml"); + if (f.exists()) { + try { + return f.toURI().toURL(); + } catch (Exception e) { + // fall through + } + } + return null; + } +} diff --git a/src/test/resources/features/4.1.x.feature b/src/test/resources/features/4.1.x.feature index 459dc4d27..7b8aaff56 100644 --- a/src/test/resources/features/4.1.x.feature +++ b/src/test/resources/features/4.1.x.feature @@ -9,3 +9,4 @@ Feature: 4.1.x | 956 | 1 | "github956" | "--skip-context-validation -t {datasrc}/" | | | 1458 | 1 | "github1458" | " -t {datasrc}/" | "summary:productValidation:passed=2,summary:productValidation:total=2,summary:totalWarnings=6,messageTypes:warning.label.context_ref_mismatch=5,messageTypes:warning.label.schematron=1" | | 1481 | 1 | "github1481" | "--skip-context-validation --skip-product-validation -R pds4.bundle -t {datasrc}/bundle_test.xml" | "summary:totalWarnings=1,summary:messageTypes:warning.integrity.reference_not_found=1" | +| 1548 | 1 | "github1548" | "--skip-context-validation -t {datasrc}/nonexistent.xml" | "summary:totalErrors=1,summary:productValidation:failed=1,summary:messageTypes:error.label.missing_file=1" | diff --git a/src/test/resources/github1548/.gitkeep b/src/test/resources/github1548/.gitkeep new file mode 100644 index 000000000..e69de29bb