Skip to content

Commit 4d932b5

Browse files
bjormelclaude
andcommitted
Skip suspended hosts when fetching deployment metrics
Adds NodeSuspensionProvider interface to config-provisioning and wires it into DeploymentMetricsRetriever so suspended nodes are excluded from metrics collection. ApplicationRepository injects the provider via DI. Co-Authored-By: Claude Sonnet 4.6 <[email protected]>
1 parent 6d78ac1 commit 4d932b5

4 files changed

Lines changed: 95 additions & 10 deletions

File tree

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
2+
package com.yahoo.config.provision;
3+
4+
import java.util.Set;
5+
6+
/**
7+
* Provides the set of hostnames that are currently suspended for a given application.
8+
* Implemented in hosted Vespa using the node repository orchestrator.
9+
*
10+
* @author bjormel
11+
*/
12+
public interface NodeSuspensionProvider {
13+
14+
NodeSuspensionProvider EMPTY = __ -> Set.of();
15+
16+
/** Returns the hostnames of all nodes that are currently suspended for the given application. Never null; returns an empty set if none are suspended. */
17+
Set<String> suspendedHosts(ApplicationId applicationId);
18+
19+
}

configserver/src/main/java/com/yahoo/vespa/config/server/ApplicationRepository.java

Lines changed: 26 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
import com.yahoo.collections.Pair;
1010
import com.yahoo.component.Version;
1111
import com.yahoo.component.annotation.Inject;
12+
import com.yahoo.component.provider.ComponentRegistry;
1213
import com.yahoo.config.FileReference;
1314
import com.yahoo.config.application.api.ApplicationFile;
1415
import com.yahoo.config.application.api.ApplicationMetaData;
@@ -31,6 +32,7 @@
3132
import com.yahoo.config.provision.RegionName;
3233
import com.yahoo.config.provision.SystemName;
3334
import com.yahoo.config.provision.TenantName;
35+
import com.yahoo.config.provision.NodeSuspensionProvider;
3436
import com.yahoo.config.provision.Zone;
3537
import com.yahoo.config.provision.exception.ActivationConflictException;
3638
import com.yahoo.container.jdisc.HttpResponse;
@@ -77,6 +79,7 @@
7779
import com.yahoo.vespa.config.server.http.v2.PrepareResult;
7880
import com.yahoo.vespa.config.server.http.v2.response.DeploymentMetricsResponse;
7981
import com.yahoo.vespa.config.server.http.v2.response.SearchNodeMetricsResponse;
82+
import com.yahoo.vespa.config.server.metrics.ClusterDeploymentMetricsRetriever;
8083
import com.yahoo.vespa.config.server.metrics.DeploymentMetricsRetriever;
8184
import com.yahoo.vespa.config.server.metrics.SearchNodeMetricsRetriever;
8285
import com.yahoo.vespa.config.server.provision.HostProvisionerProvider;
@@ -165,6 +168,7 @@ public class ApplicationRepository implements com.yahoo.config.provision.Deploye
165168
private final ClusterReindexingStatusClient clusterReindexingStatusClient;
166169
private final ActiveTokenFingerprints activeTokenFingerprints;
167170
private final FlagSource flagSource;
171+
private final DeploymentMetricsRetriever deploymentMetricsRetriever;
168172

169173
@Inject
170174
public ApplicationRepository(TenantRepository tenantRepository,
@@ -177,11 +181,12 @@ public ApplicationRepository(TenantRepository tenantRepository,
177181
TesterClient testerClient,
178182
HealthCheckerProvider healthCheckers,
179183
Metric metric,
180-
FlagSource flagSource) {
184+
FlagSource flagSource,
185+
ComponentRegistry<NodeSuspensionProvider> nodeSuspensionProviders) {
181186
this(tenantRepository,
182187
hostProvisionerProvider.getHostProvisioner(),
183188
infraDeployerProvider.getInfraDeployer(),
184-
configConvergenceChecker,
189+
configConvergenceChecker,
185190
configStateChecker,
186191
httpProxy,
187192
EndpointsChecker.of(healthCheckers.getHealthChecker()),
@@ -193,7 +198,20 @@ public ApplicationRepository(TenantRepository tenantRepository,
193198
new SecretStoreValidator(),
194199
new DefaultClusterReindexingStatusClient(),
195200
new ActiveTokenFingerprintsClient(),
196-
flagSource);
201+
flagSource,
202+
new DeploymentMetricsRetriever(new ClusterDeploymentMetricsRetriever(),
203+
nodeSuspensionProvider(nodeSuspensionProviders)));
204+
}
205+
206+
private static NodeSuspensionProvider nodeSuspensionProvider(ComponentRegistry<NodeSuspensionProvider> registry) {
207+
var providers = registry.allComponents();
208+
if (providers.size() > 1) {
209+
String providerDescriptions = providers.stream()
210+
.map(provider -> provider.getClass().getName())
211+
.collect(Collectors.joining(", "));
212+
throw new IllegalArgumentException("Multiple NodeSuspensionProvider components registered: " + providerDescriptions);
213+
}
214+
return providers.isEmpty() ? NodeSuspensionProvider.EMPTY : providers.get(0);
197215
}
198216

199217
private ApplicationRepository(TenantRepository tenantRepository,
@@ -211,7 +229,8 @@ private ApplicationRepository(TenantRepository tenantRepository,
211229
SecretStoreValidator secretStoreValidator,
212230
ClusterReindexingStatusClient clusterReindexingStatusClient,
213231
ActiveTokenFingerprints activeTokenFingerprints,
214-
FlagSource flagSource) {
232+
FlagSource flagSource,
233+
DeploymentMetricsRetriever deploymentMetricsRetriever) {
215234
this.tenantRepository = Objects.requireNonNull(tenantRepository);
216235
this.hostProvisioner = Objects.requireNonNull(hostProvisioner);
217236
this.infraDeployer = Objects.requireNonNull(infraDeployer);
@@ -228,6 +247,7 @@ private ApplicationRepository(TenantRepository tenantRepository,
228247
this.clusterReindexingStatusClient = Objects.requireNonNull(clusterReindexingStatusClient);
229248
this.activeTokenFingerprints = Objects.requireNonNull(activeTokenFingerprints);
230249
this.flagSource = flagSource;
250+
this.deploymentMetricsRetriever = Objects.requireNonNull(deploymentMetricsRetriever);
231251
}
232252

233253
// Should be used by tests only (first constructor in this class makes sure we use injectable components where possible)
@@ -327,7 +347,8 @@ public ApplicationRepository build() {
327347
secretStoreValidator,
328348
ClusterReindexingStatusClient.DUMMY_INSTANCE,
329349
__ -> activeTokens,
330-
flagSource);
350+
flagSource,
351+
new DeploymentMetricsRetriever());
331352
}
332353

333354
}
@@ -983,7 +1004,6 @@ public SearchNodeMetricsResponse getSearchNodeMetrics(ApplicationId applicationI
9831004

9841005
public DeploymentMetricsResponse getDeploymentMetrics(ApplicationId applicationId) {
9851006
Application application = getApplication(applicationId);
986-
DeploymentMetricsRetriever deploymentMetricsRetriever = new DeploymentMetricsRetriever();
9871007
return deploymentMetricsRetriever.getMetrics(application);
9881008
}
9891009

configserver/src/main/java/com/yahoo/vespa/config/server/metrics/DeploymentMetricsRetriever.java

Lines changed: 14 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -3,10 +3,12 @@
33

44
import com.yahoo.config.model.api.HostInfo;
55
import com.yahoo.config.model.api.ServiceInfo;
6+
import com.yahoo.config.provision.NodeSuspensionProvider;
67
import com.yahoo.vespa.config.server.application.Application;
78
import com.yahoo.vespa.config.server.http.v2.response.DeploymentMetricsResponse;
89
import java.net.URI;
910
import java.util.Collection;
11+
import java.util.Set;
1012
import java.util.function.Predicate;
1113

1214
/**
@@ -18,28 +20,36 @@
1820
public class DeploymentMetricsRetriever {
1921

2022
private final ClusterDeploymentMetricsRetriever metricsRetriever;
23+
private final NodeSuspensionProvider nodeSuspensionProvider;
2124

2225
public DeploymentMetricsRetriever() {
23-
this(new ClusterDeploymentMetricsRetriever());
26+
this(new ClusterDeploymentMetricsRetriever(), NodeSuspensionProvider.EMPTY);
2427
}
2528

2629
public DeploymentMetricsRetriever(ClusterDeploymentMetricsRetriever metricsRetriever) {
30+
this(metricsRetriever, NodeSuspensionProvider.EMPTY);
31+
}
32+
33+
public DeploymentMetricsRetriever(ClusterDeploymentMetricsRetriever metricsRetriever,
34+
NodeSuspensionProvider nodeSuspensionProvider) {
2735
this.metricsRetriever = metricsRetriever;
36+
this.nodeSuspensionProvider = nodeSuspensionProvider;
2837
}
2938

3039
public DeploymentMetricsResponse getMetrics(Application application) {
31-
var hosts = getHostsOfApplication(application);
40+
var suspendedHostnames = nodeSuspensionProvider.suspendedHosts(application.getId());
41+
var hosts = getHostsOfApplication(application, suspendedHostnames);
3242
var clusterMetrics = metricsRetriever.requestMetricsGroupedByCluster(hosts);
3343
return new DeploymentMetricsResponse(application.getId(), clusterMetrics);
3444
}
3545

36-
private static Collection<URI> getHostsOfApplication(Application application) {
46+
private static Collection<URI> getHostsOfApplication(Application application, Set<String> suspendedHostnames) {
3747
return application.getModel().getHosts().stream()
3848
.filter(host -> host.getServices().stream().noneMatch(isLogserver()))
49+
.filter(host -> !suspendedHostnames.contains(host.getHostname()))
3950
.map(HostInfo::getHostname)
4051
.map(DeploymentMetricsRetriever::createMetricsProxyURI)
4152
.toList();
42-
4353
}
4454

4555
private static Predicate<ServiceInfo> isLogserver() {

configserver/src/test/java/com/yahoo/vespa/config/server/metrics/DeploymentMetricsRetrieverTest.java

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
import com.yahoo.config.model.api.ServiceInfo;
99
import com.yahoo.config.provision.AllocatedHosts;
1010
import com.yahoo.config.provision.ApplicationId;
11+
import com.yahoo.config.provision.NodeSuspensionProvider;
1112
import com.yahoo.vespa.config.ConfigKey;
1213
import com.yahoo.vespa.config.buildergen.ConfigDefinition;
1314
import com.yahoo.vespa.config.server.application.Application;
@@ -22,6 +23,7 @@
2223
import java.util.Set;
2324

2425
import static org.junit.Assert.assertEquals;
26+
import static org.junit.Assert.assertTrue;
2527

2628
/**
2729
* @author olaa
@@ -41,6 +43,40 @@ public void getMetrics() {
4143
assertEquals(2, mockMetricsRetriever.hosts.size()); // Verify that logserver was ignored
4244
}
4345

46+
@Test
47+
public void suspendedHostsAreFiltered() {
48+
MockModel mockModel = new MockModel(mockHosts());
49+
MockDeploymentMetricsRetriever mockMetricsRetriever = new MockDeploymentMetricsRetriever();
50+
ApplicationId applicationId = ApplicationId.fromSerializedForm("tenant:app:instance");
51+
Application application = new Application(mockModel, null, 0,
52+
null, null, applicationId);
53+
54+
NodeSuspensionProvider suspensionProvider = id -> Set.of("host1");
55+
DeploymentMetricsRetriever retriever = new DeploymentMetricsRetriever(mockMetricsRetriever, suspensionProvider);
56+
retriever.getMetrics(application);
57+
58+
assertEquals(1, mockMetricsRetriever.hosts.size()); // logserver (host3) and suspended host1 are ignored
59+
assertTrue(mockMetricsRetriever.hosts.stream().anyMatch(uri -> uri.getHost().equals("host2")));
60+
assertTrue(mockMetricsRetriever.hosts.stream().noneMatch(uri -> uri.getHost().equals("host1")));
61+
}
62+
63+
@Test
64+
public void noSuspendedHostsWhenProviderReturnsEmptySet() {
65+
MockModel mockModel = new MockModel(mockHosts());
66+
MockDeploymentMetricsRetriever mockMetricsRetriever = new MockDeploymentMetricsRetriever();
67+
ApplicationId applicationId = ApplicationId.fromSerializedForm("tenant:app:instance");
68+
Application application = new Application(mockModel, null, 0,
69+
null, null, applicationId);
70+
71+
NodeSuspensionProvider suspensionProvider = id -> Set.of();
72+
DeploymentMetricsRetriever retriever = new DeploymentMetricsRetriever(mockMetricsRetriever, suspensionProvider);
73+
retriever.getMetrics(application);
74+
// With an empty suspension set, behavior should be the same as without a provider: only logserver is ignored
75+
assertEquals(2, mockMetricsRetriever.hosts.size());
76+
assertTrue(mockMetricsRetriever.hosts.stream().anyMatch(uri -> uri.getHost().equals("host1")));
77+
assertTrue(mockMetricsRetriever.hosts.stream().anyMatch(uri -> uri.getHost().equals("host2")));
78+
}
79+
4480
private Collection<HostInfo> mockHosts() {
4581

4682
HostInfo hostInfo1 = new HostInfo("host1",

0 commit comments

Comments
 (0)