-
Notifications
You must be signed in to change notification settings - Fork 0
Fix InMemoryRegistrar performance: O(1) lookups, ConcurrentHashMap, cache clearing #2
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
bd48fc0
2e1d7be
8f4c04a
937d380
431439e
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -16,6 +16,8 @@ | |
| import java.io.File; | ||
| import java.net.MalformedURLException; | ||
| import java.util.*; | ||
| import java.util.concurrent.ConcurrentHashMap; | ||
| import java.util.concurrent.atomic.AtomicInteger; | ||
| import java.util.function.Function; | ||
| import java.util.function.Predicate; | ||
| import java.util.stream.Collectors; | ||
|
|
@@ -28,14 +30,24 @@ public class InMemoryRegistrar implements TargetRegistrar { | |
|
|
||
| private static Logger LOG = LoggerFactory.getLogger(InMemoryRegistrar.class); | ||
| private ValidationTarget rootTarget; | ||
| private Map<String, ValidationTarget> targets = new HashMap<>(); | ||
| private Map<String, ValidationTarget> collections = new HashMap<>(); | ||
| private Map<String, ValidationTarget> bundles = new HashMap<>(); | ||
| private Set<String> referencedTargetLocations = new HashSet<>(); | ||
| private Map<Identifier, String> identifierDefinitions = new HashMap<>(); | ||
| private Map<String, Set<Identifier>> identifierDefinitionsByLid = new HashMap<>(); | ||
| private Map<Identifier, String> identifierReferenceLocations = new HashMap<>(); | ||
| private Map<String, Set<Identifier>> referencedIdentifiersByLid = new HashMap<>(); | ||
| private Map<String, ValidationTarget> targets = new ConcurrentHashMap<>(); | ||
| private Map<String, ValidationTarget> collections = new ConcurrentHashMap<>(); | ||
| private Map<String, ValidationTarget> bundles = new ConcurrentHashMap<>(); | ||
| private Set<String> referencedTargetLocations = ConcurrentHashMap.newKeySet(); | ||
| private Map<Identifier, String> identifierDefinitions = new ConcurrentHashMap<>(); | ||
| private Map<String, Set<Identifier>> identifierDefinitionsByLid = new ConcurrentHashMap<>(); | ||
| private Map<Identifier, String> identifierReferenceLocations = new ConcurrentHashMap<>(); | ||
| private Map<String, Set<Identifier>> referencedIdentifiersByLid = new ConcurrentHashMap<>(); | ||
|
|
||
| // Parent-child index: maps parent location to list of direct child locations. | ||
| // The inner ArrayLists are only accessed under the instance monitor (both addTarget() | ||
| // and getChildTargets() are synchronized), so they are thread-safe despite being mutable. | ||
| private Map<String, List<String>> childrenByParent = new ConcurrentHashMap<>(); | ||
|
|
||
| // Count-by-type index: tracks target counts per TargetType. | ||
| // Uses AtomicInteger to avoid boxed Integer allocations on each merge() call. | ||
| private Map<TargetType, AtomicInteger> targetCountByType = new ConcurrentHashMap<>(); | ||
| private AtomicInteger labelCount = new AtomicInteger(0); | ||
|
|
||
| @Override | ||
| public ValidationTarget getRoot() { | ||
|
|
@@ -57,7 +69,33 @@ public synchronized void addTarget(String parentLocation, TargetType type, Strin | |
| this.collections.put(location, target); | ||
| } | ||
|
|
||
| boolean isNew = !this.targets.containsKey(location); | ||
|
|
||
| // Only update indexes for genuinely new targets to avoid duplicates. | ||
| // Invariant: a location is never re-registered with a different TargetType. | ||
| // Both RegisterTargets.java call sites derive the type from the location itself | ||
| // (via Utility.getTargetType), so re-registration with a different type cannot | ||
| // happen in practice. Log a warning if this invariant is ever violated. | ||
| if (!isNew) { | ||
| ValidationTarget existing = this.targets.get(location); | ||
| if (existing != null && !type.equals(existing.getType())) { | ||
| LOG.warn("addTarget(): location {} re-registered with type {} (was {})", | ||
| location, type, existing.getType()); | ||
| } | ||
| } | ||
|
|
||
| this.targets.put(location, target); | ||
|
|
||
| if (isNew) { | ||
| // Index parent-child relationship for O(1) child lookups | ||
| if (parentLocation != null) { | ||
| childrenByParent.computeIfAbsent(parentLocation, k -> new ArrayList<>()).add(location); | ||
| } | ||
|
|
||
| // Increment count-by-type index | ||
| targetCountByType.computeIfAbsent(type, k -> new AtomicInteger(0)).incrementAndGet(); | ||
| } | ||
|
Comment on lines
69
to
+97
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 🟡 targetCountByType becomes stale when target is re-added with a different TargetType In While the primary caller Was this helpful? React with 👍 or 👎 to provide feedback. |
||
|
|
||
| LOG.debug("addTarget(): location: {}, target: {}", location, target); | ||
| } catch (MalformedURLException e) { | ||
| // TODO Auto-generated catch block | ||
|
|
@@ -67,40 +105,39 @@ public synchronized void addTarget(String parentLocation, TargetType type, Strin | |
|
|
||
| @Override | ||
| public synchronized Collection<ValidationTarget> getChildTargets(ValidationTarget parent) { | ||
| List<ValidationTarget> children = new ArrayList<>(); | ||
| String parentLocation = parent.getLocation() + File.separator; | ||
|
|
||
| for (String targetLocation : targets.keySet()) { | ||
| if (targetLocation.startsWith(parentLocation) | ||
| && !targetLocation.substring(parentLocation.length()).contains(File.separator)) { | ||
| children.add(targets.get(targetLocation)); | ||
| } | ||
| List<String> childLocations = childrenByParent.getOrDefault(parent.getLocation(), Collections.emptyList()); | ||
| List<ValidationTarget> children = new ArrayList<>(childLocations.size()); | ||
| for (String loc : childLocations) { | ||
| ValidationTarget t = targets.get(loc); | ||
| if (t != null) children.add(t); | ||
| } | ||
|
|
||
| Collections.sort(children); | ||
| return children; | ||
| } | ||
|
|
||
| @Override | ||
| public synchronized boolean hasTarget(String targetLocation) { | ||
| public boolean hasTarget(String targetLocation) { | ||
| return targets.containsKey(targetLocation); | ||
| } | ||
|
|
||
| @Override | ||
| public synchronized int getTargetCount(TargetType type) { | ||
| int count = 0; | ||
|
|
||
| for (Map.Entry<String, ValidationTarget> entry : targets.entrySet()) { | ||
| if (entry.getValue().getType() == type) { | ||
| ++count; | ||
| } | ||
| } | ||
| return count; | ||
| public int getTargetCount(TargetType type) { | ||
| AtomicInteger count = targetCountByType.get(type); | ||
| return count != null ? count.get() : 0; | ||
| } | ||
|
|
||
| @Override | ||
| public synchronized void setTargetIsLabel(String location, boolean isLabel) { | ||
| targets.get(location).setLabel(isLabel); | ||
| ValidationTarget target = targets.get(location); | ||
| boolean wasLabel = target.isLabel(); | ||
| target.setLabel(isLabel); | ||
|
|
||
| // Update label count index | ||
| if (isLabel && !wasLabel) { | ||
| labelCount.incrementAndGet(); | ||
| } else if (!isLabel && wasLabel) { | ||
| labelCount.decrementAndGet(); | ||
| } | ||
|
|
||
| // Labels refer to themselves. | ||
| if (isLabel) { | ||
|
|
@@ -109,15 +146,8 @@ public synchronized void setTargetIsLabel(String location, boolean isLabel) { | |
| } | ||
|
|
||
| @Override | ||
| public synchronized int getLabelCount() { | ||
| int count = 0; | ||
| for (Map.Entry<String, ValidationTarget> entry : targets.entrySet()) { | ||
| if (entry.getValue().isLabel()) { | ||
| ++count; | ||
| } | ||
| } | ||
|
|
||
| return count; | ||
| public int getLabelCount() { | ||
| return labelCount.get(); | ||
| } | ||
|
|
||
| @Override | ||
|
|
@@ -126,7 +156,7 @@ public synchronized void setTargetIdentifier(String location, Identifier identif | |
| LOG.debug("setTargetIdentifier:identifier,location {},{}", identifier, location); | ||
| identifierDefinitions.put(identifier, location); | ||
|
|
||
| identifierDefinitionsByLid.computeIfAbsent(identifier.getLid(), x -> new HashSet<>()).add(identifier); | ||
| identifierDefinitionsByLid.computeIfAbsent(identifier.getLid(), x -> ConcurrentHashMap.newKeySet()).add(identifier); | ||
| } | ||
|
|
||
| @Override | ||
|
|
@@ -135,7 +165,7 @@ public synchronized void addTargetReference(String referenceLocation, String tar | |
| } | ||
|
|
||
| @Override | ||
| public synchronized boolean isTargetReferenced(String location) { | ||
| public boolean isTargetReferenced(String location) { | ||
| return referencedTargetLocations.contains(location); | ||
| } | ||
|
|
||
|
|
@@ -145,11 +175,11 @@ public synchronized void addIdentifierReference(String referenceLocation, Identi | |
|
|
||
| String lid = identifier.getLid(); | ||
|
|
||
| referencedIdentifiersByLid.computeIfAbsent(identifier.getLid(), x -> new HashSet<>()).add(identifier); | ||
| referencedIdentifiersByLid.computeIfAbsent(identifier.getLid(), x -> ConcurrentHashMap.newKeySet()).add(identifier); | ||
| } | ||
|
|
||
| @Override | ||
| public synchronized boolean isIdentifierReferenced(Identifier identifier, boolean orNearNeighbor) { | ||
| public boolean isIdentifierReferenced(Identifier identifier, boolean orNearNeighbor) { | ||
| boolean result = identifierReferenceLocations.containsKey(identifier); | ||
| if (!result && orNearNeighbor) { | ||
| for (Identifier id : this.referencedIdentifiersByLid.getOrDefault(identifier.getLid(), Collections.emptySet())) { | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,150 @@ | ||
| package gov.nasa.pds.tools.validate; | ||
|
|
||
| import static org.junit.jupiter.api.Assertions.assertEquals; | ||
| import static org.junit.jupiter.api.Assertions.assertTrue; | ||
|
|
||
| import java.util.Collection; | ||
| import java.util.List; | ||
| import java.util.stream.Collectors; | ||
|
|
||
| import org.junit.jupiter.api.BeforeEach; | ||
| import org.junit.jupiter.api.Test; | ||
|
|
||
| /** | ||
| * Unit tests for {@link InMemoryRegistrar}, focusing on the parent-child index | ||
| * and count-by-type caching introduced to eliminate O(n) scans. | ||
| */ | ||
| class InMemoryRegistrarTest { | ||
|
|
||
| private InMemoryRegistrar registrar; | ||
|
|
||
| // Use file:// URLs to match the format used in production | ||
| private static final String BUNDLE = "file:///data/bundle"; | ||
| private static final String COLLECTION_A = "file:///data/bundle/collection_a"; | ||
| private static final String COLLECTION_B = "file:///data/bundle/collection_b"; | ||
| private static final String PRODUCT_1 = "file:///data/bundle/collection_a/product1.xml"; | ||
| private static final String PRODUCT_2 = "file:///data/bundle/collection_a/product2.xml"; | ||
| private static final String PRODUCT_3 = "file:///data/bundle/collection_b/product3.xml"; | ||
|
|
||
| @BeforeEach | ||
| void setUp() { | ||
| registrar = new InMemoryRegistrar(); | ||
| ValidationTarget.clearCache(); | ||
| } | ||
|
|
||
| @Test | ||
| void testGetChildTargetsMultiLevelHierarchy() { | ||
| // Build a 3-level hierarchy: bundle -> collections -> products | ||
| registrar.addTarget(null, TargetType.BUNDLE, BUNDLE); | ||
| registrar.addTarget(BUNDLE, TargetType.COLLECTION, COLLECTION_A); | ||
| registrar.addTarget(BUNDLE, TargetType.COLLECTION, COLLECTION_B); | ||
| registrar.addTarget(COLLECTION_A, TargetType.FILE, PRODUCT_1); | ||
| registrar.addTarget(COLLECTION_A, TargetType.FILE, PRODUCT_2); | ||
| registrar.addTarget(COLLECTION_B, TargetType.FILE, PRODUCT_3); | ||
|
|
||
| // Bundle's children should be only the two collections (not grandchildren) | ||
| ValidationTarget bundleTarget = registrar.getRoot(); | ||
| Collection<ValidationTarget> bundleChildren = registrar.getChildTargets(bundleTarget); | ||
| List<String> bundleChildLocations = bundleChildren.stream() | ||
| .map(ValidationTarget::getLocation) | ||
| .collect(Collectors.toList()); | ||
|
|
||
| assertEquals(2, bundleChildLocations.size(), | ||
| "Bundle should have exactly 2 direct children (collections)"); | ||
| assertTrue(bundleChildLocations.contains(COLLECTION_A)); | ||
| assertTrue(bundleChildLocations.contains(COLLECTION_B)); | ||
|
|
||
| // Collection A's children should be product1 and product2 | ||
| ValidationTarget collATarget = registrar.getTargets().get(COLLECTION_A); | ||
| Collection<ValidationTarget> collAChildren = registrar.getChildTargets(collATarget); | ||
| List<String> collAChildLocations = collAChildren.stream() | ||
| .map(ValidationTarget::getLocation) | ||
| .collect(Collectors.toList()); | ||
|
|
||
| assertEquals(2, collAChildLocations.size(), | ||
| "Collection A should have exactly 2 direct children (products)"); | ||
| assertTrue(collAChildLocations.contains(PRODUCT_1)); | ||
| assertTrue(collAChildLocations.contains(PRODUCT_2)); | ||
|
|
||
| // Collection B's children should be only product3 | ||
| ValidationTarget collBTarget = registrar.getTargets().get(COLLECTION_B); | ||
| Collection<ValidationTarget> collBChildren = registrar.getChildTargets(collBTarget); | ||
| List<String> collBChildLocations = collBChildren.stream() | ||
| .map(ValidationTarget::getLocation) | ||
| .collect(Collectors.toList()); | ||
|
|
||
| assertEquals(1, collBChildLocations.size(), | ||
| "Collection B should have exactly 1 direct child"); | ||
| assertTrue(collBChildLocations.contains(PRODUCT_3)); | ||
| } | ||
|
|
||
| @Test | ||
| void testGetChildTargetsLeafNodeReturnsEmpty() { | ||
| registrar.addTarget(null, TargetType.BUNDLE, BUNDLE); | ||
| registrar.addTarget(BUNDLE, TargetType.FILE, PRODUCT_1); | ||
|
|
||
| // Leaf node should have no children | ||
| ValidationTarget product = registrar.getTargets().get(PRODUCT_1); | ||
| Collection<ValidationTarget> children = registrar.getChildTargets(product); | ||
| assertTrue(children.isEmpty(), "Leaf node should have no children"); | ||
| } | ||
|
|
||
| @Test | ||
| void testDuplicateAddTargetDoesNotCreateDuplicateChildren() { | ||
| registrar.addTarget(null, TargetType.BUNDLE, BUNDLE); | ||
| registrar.addTarget(BUNDLE, TargetType.FILE, PRODUCT_1); | ||
| // Add the same target again | ||
| registrar.addTarget(BUNDLE, TargetType.FILE, PRODUCT_1); | ||
|
|
||
| ValidationTarget bundleTarget = registrar.getRoot(); | ||
| Collection<ValidationTarget> children = registrar.getChildTargets(bundleTarget); | ||
| assertEquals(1, children.size(), | ||
| "Duplicate addTarget should not create duplicate children"); | ||
| } | ||
|
|
||
| @Test | ||
| void testTargetCountByType() { | ||
| registrar.addTarget(null, TargetType.BUNDLE, BUNDLE); | ||
| registrar.addTarget(BUNDLE, TargetType.COLLECTION, COLLECTION_A); | ||
| registrar.addTarget(BUNDLE, TargetType.COLLECTION, COLLECTION_B); | ||
| registrar.addTarget(COLLECTION_A, TargetType.FILE, PRODUCT_1); | ||
| registrar.addTarget(COLLECTION_A, TargetType.FILE, PRODUCT_2); | ||
| registrar.addTarget(COLLECTION_B, TargetType.FILE, PRODUCT_3); | ||
|
|
||
| assertEquals(1, registrar.getTargetCount(TargetType.BUNDLE)); | ||
| assertEquals(2, registrar.getTargetCount(TargetType.COLLECTION)); | ||
| assertEquals(3, registrar.getTargetCount(TargetType.FILE)); | ||
| assertEquals(0, registrar.getTargetCount(TargetType.DIRECTORY)); | ||
| } | ||
|
|
||
| @Test | ||
| void testDuplicateAddTargetDoesNotInflateCount() { | ||
| registrar.addTarget(null, TargetType.BUNDLE, BUNDLE); | ||
| registrar.addTarget(BUNDLE, TargetType.FILE, PRODUCT_1); | ||
| registrar.addTarget(BUNDLE, TargetType.FILE, PRODUCT_1); // duplicate | ||
|
|
||
| assertEquals(1, registrar.getTargetCount(TargetType.BUNDLE)); | ||
| assertEquals(1, registrar.getTargetCount(TargetType.FILE), | ||
| "Duplicate addTarget should not inflate count"); | ||
| } | ||
|
|
||
| @Test | ||
| void testLabelCount() { | ||
| registrar.addTarget(null, TargetType.BUNDLE, BUNDLE); | ||
| registrar.addTarget(BUNDLE, TargetType.FILE, PRODUCT_1); | ||
| registrar.addTarget(BUNDLE, TargetType.FILE, PRODUCT_2); | ||
|
|
||
| assertEquals(0, registrar.getLabelCount()); | ||
|
|
||
| registrar.setTargetIsLabel(PRODUCT_1, true); | ||
| assertEquals(1, registrar.getLabelCount()); | ||
|
|
||
| registrar.setTargetIsLabel(PRODUCT_2, true); | ||
| assertEquals(2, registrar.getLabelCount()); | ||
|
|
||
| // Setting the same target as label again should not double-count | ||
| registrar.setTargetIsLabel(PRODUCT_1, true); | ||
| assertEquals(2, registrar.getLabelCount(), | ||
| "Re-setting same label should not increase count"); | ||
| } | ||
| } |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
🟡
setTargets()bypasses new indexes, causing stalegetChildTargets(),getTargetCount(), andgetLabelCount()resultsThe PR changes
getChildTargets(),getTargetCount(), andgetLabelCount()to use precomputed indexes (childrenByParent,targetCountByType,labelCount) instead of scanning thetargetsmap. However, the existingsetTargets()method atInMemoryRegistrar.java:338replaces thetargetsmap without rebuilding these indexes. After asetTargets()call, the indexes become stale:getChildTargets()returns children from the old map,getTargetCount()returns old counts, andgetLabelCount()reflects old label state. Before this PR, all three methods scanned the livetargetsmap, sosetTargets()was inherently consistent. The same issue applies tosetBundles()andsetCollections(), though those only affect their own maps. While there are no current callers ofsetTargets()in the codebase, it is a public method on theTargetRegistrarinterface (TargetRegistrar.java:191).(Refers to lines 338-340)
Prompt for agents
Was this helpful? React with 👍 or 👎 to provide feedback.