Skip to content

Commit 691fb1c

Browse files
authored
[grid] Improve race conditions in Grid session distribution (#16939)
Signed-off-by: Viet Nguyen Duc <[email protected]>
1 parent aaeb4a5 commit 691fb1c

File tree

4 files changed

+59
-47
lines changed

4 files changed

+59
-47
lines changed

java/src/org/openqa/selenium/grid/distributor/local/LocalDistributor.java

Lines changed: 24 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -441,27 +441,34 @@ private CreateSessionResponse startSession(
441441
}
442442

443443
private SlotId reserveSlot(RequestId requestId, Capabilities caps) {
444-
Lock writeLock = lock.writeLock();
445-
writeLock.lock();
444+
// Use read lock for slot selection to allow concurrent reads
445+
// This reduces contention compared to using write lock for the entire operation
446+
Set<SlotId> slotIds;
447+
Lock readLock = lock.readLock();
448+
readLock.lock();
446449
try {
447-
Set<SlotId> slotIds = slotSelector.selectSlot(caps, getAvailableNodes(), slotMatcher);
448-
if (slotIds.isEmpty()) {
449-
LOG.log(
450-
getDebugLogLevel(),
451-
String.format("No slots found for request %s and capabilities %s", requestId, caps));
452-
return null;
453-
}
454-
455-
for (SlotId slotId : slotIds) {
456-
if (reserve(slotId)) {
457-
return slotId;
458-
}
459-
}
450+
slotIds = slotSelector.selectSlot(caps, getAvailableNodes(), slotMatcher);
451+
} finally {
452+
readLock.unlock();
453+
}
460454

455+
if (slotIds.isEmpty()) {
456+
LOG.log(
457+
getDebugLogLevel(),
458+
String.format("No slots found for request %s and capabilities %s", requestId, caps));
461459
return null;
462-
} finally {
463-
writeLock.unlock();
464460
}
461+
462+
// Try to reserve each candidate slot
463+
// The reserve() method uses write lock briefly and atomic operations ensure thread safety
464+
// Multiple threads may select the same slot but only one will successfully reserve it
465+
for (SlotId slotId : slotIds) {
466+
if (reserve(slotId)) {
467+
return slotId;
468+
}
469+
}
470+
471+
return null;
465472
}
466473

467474
private boolean isNotSupported(Capabilities caps) {

java/src/org/openqa/selenium/grid/distributor/local/LocalGridModel.java

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -337,13 +337,16 @@ public boolean reserve(SlotId slotId) {
337337
}
338338

339339
Optional<Slot> maybeSlot =
340-
node.getSlots().stream().filter(slot -> slotId.equals(slot.getId())).findFirst();
340+
node.getSlots().stream()
341+
.filter(slot -> slotId.equals(slot.getId()))
342+
.filter(slot -> slot.getSession() == null)
343+
.findFirst();
341344

342345
if (maybeSlot.isEmpty()) {
343-
LOG.warning(
346+
LOG.fine(
344347
String.format(
345-
"Asked to reserve slot on node %s, but no slot with id %s found",
346-
node.getNodeId(), slotId));
348+
"Asked to reserve slot %s on node %s, but slot not found or already reserved",
349+
slotId, node.getNodeId()));
347350
return false;
348351
}
349352

java/src/org/openqa/selenium/grid/node/local/SessionSlot.java

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -62,7 +62,8 @@ public class SessionSlot
6262
private final boolean supportingCdp;
6363
private final boolean supportingBiDi;
6464
private final AtomicLong connectionCounter;
65-
private ActiveSession currentSession;
65+
// volatile ensures memory visibility across threads when session is set after reservation
66+
private volatile ActiveSession currentSession;
6667

6768
public SessionSlot(EventBus bus, Capabilities stereotype, SessionFactory factory) {
6869
this.bus = Require.nonNull("Event bus", bus);
@@ -131,16 +132,17 @@ public void stop(SessionClosedReason reason) {
131132
}
132133

133134
SessionId id = currentSession.getId();
135+
LOG.info(String.format("Stopping session %s (reason: %s)", id, reason));
134136
try {
135137
currentSession.stop();
138+
LOG.info(String.format("Session stopped successfully: %s", id));
136139
} catch (Exception e) {
137-
LOG.log(Level.WARNING, "Unable to cleanly close session", e);
140+
LOG.log(Level.WARNING, String.format("Unable to cleanly close session %s", id), e);
138141
}
139142
currentSession = null;
140143
connectionCounter.set(0);
141144
release();
142145
bus.fire(new SessionClosedEvent(id, reason));
143-
LOG.info(String.format("Stopping session %s (reason: %s)", id, reason));
144146
}
145147

146148
@Override

java/src/org/openqa/selenium/grid/router/HandleSession.java

Lines changed: 23 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,6 @@
3333
import java.time.Duration;
3434
import java.time.Instant;
3535
import java.time.temporal.ChronoUnit;
36-
import java.util.Iterator;
3736
import java.util.concurrent.Callable;
3837
import java.util.concurrent.ConcurrentHashMap;
3938
import java.util.concurrent.ConcurrentMap;
@@ -118,28 +117,29 @@ public void close() {
118117
Runnable cleanUpHttpClients =
119118
() -> {
120119
Instant staleBefore = Instant.now().minus(2, ChronoUnit.MINUTES);
121-
Iterator<CacheEntry> iterator = httpClients.values().iterator();
122-
123-
while (iterator.hasNext()) {
124-
CacheEntry entry = iterator.next();
125-
126-
if (entry.inUse.get() != 0) {
127-
// the client is currently in use
128-
return;
129-
} else if (!entry.lastUse.isBefore(staleBefore)) {
130-
// the client was recently used
131-
return;
132-
} else {
133-
// the client has not been used for a while, remove it from the cache
134-
iterator.remove();
135-
136-
try {
137-
entry.httpClient.close();
138-
} catch (Exception ex) {
139-
LOG.log(Level.WARNING, "failed to close a stale httpclient", ex);
140-
}
141-
}
142-
}
120+
121+
// Use removeIf for safe and efficient removal from ConcurrentHashMap
122+
httpClients
123+
.entrySet()
124+
.removeIf(
125+
entry -> {
126+
CacheEntry cacheEntry = entry.getValue();
127+
if (cacheEntry.inUse.get() != 0) {
128+
// the client is currently in use
129+
return false;
130+
}
131+
if (!cacheEntry.lastUse.isBefore(staleBefore)) {
132+
// the client was recently used
133+
return false;
134+
}
135+
// the client has not been used for a while, close and remove it
136+
try {
137+
cacheEntry.httpClient.close();
138+
} catch (Exception ex) {
139+
LOG.log(Level.WARNING, "failed to close a stale httpclient", ex);
140+
}
141+
return true;
142+
});
143143
};
144144

145145
this.cleanUpHttpClientsCacheService =

0 commit comments

Comments
 (0)