Skip to content

Commit 914e2a5

Browse files
bjormelclaude
andcommitted
Skip suspended hosts when fetching deployment metrics
Adds NodeSuspensionProvider interface to config-provisioning and wires it into DeploymentMetricsRetriever so suspended nodes are excluded from metrics collection. ApplicationRepository injects the provider via DI. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
1 parent 6d78ac1 commit 914e2a5

4 files changed

Lines changed: 74 additions & 10 deletions

File tree

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
2+
package com.yahoo.config.provision;
3+
4+
import java.util.Set;
5+
6+
/**
7+
* Provides the set of hostnames that are currently suspended for a given application.
8+
* Implemented in hosted Vespa using the node repository orchestrator.
9+
*
10+
* @author bjormel
11+
*/
12+
public interface NodeSuspensionProvider {
13+
14+
NodeSuspensionProvider EMPTY = __ -> Set.of();
15+
16+
/** Returns the hostnames of all nodes that are currently suspended (have suspendedSinceMillis set) for the given application. */
17+
Set<String> suspendedHosts(ApplicationId applicationId);
18+
19+
}

configserver/src/main/java/com/yahoo/vespa/config/server/ApplicationRepository.java

Lines changed: 22 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
import com.yahoo.collections.Pair;
1010
import com.yahoo.component.Version;
1111
import com.yahoo.component.annotation.Inject;
12+
import com.yahoo.component.provider.ComponentRegistry;
1213
import com.yahoo.config.FileReference;
1314
import com.yahoo.config.application.api.ApplicationFile;
1415
import com.yahoo.config.application.api.ApplicationMetaData;
@@ -77,6 +78,8 @@
7778
import com.yahoo.vespa.config.server.http.v2.PrepareResult;
7879
import com.yahoo.vespa.config.server.http.v2.response.DeploymentMetricsResponse;
7980
import com.yahoo.vespa.config.server.http.v2.response.SearchNodeMetricsResponse;
81+
import com.yahoo.vespa.config.server.metrics.ClusterDeploymentMetricsRetriever;
82+
import com.yahoo.config.provision.NodeSuspensionProvider;
8083
import com.yahoo.vespa.config.server.metrics.DeploymentMetricsRetriever;
8184
import com.yahoo.vespa.config.server.metrics.SearchNodeMetricsRetriever;
8285
import com.yahoo.vespa.config.server.provision.HostProvisionerProvider;
@@ -165,6 +168,7 @@ public class ApplicationRepository implements com.yahoo.config.provision.Deploye
165168
private final ClusterReindexingStatusClient clusterReindexingStatusClient;
166169
private final ActiveTokenFingerprints activeTokenFingerprints;
167170
private final FlagSource flagSource;
171+
private final DeploymentMetricsRetriever deploymentMetricsRetriever;
168172

169173
@Inject
170174
public ApplicationRepository(TenantRepository tenantRepository,
@@ -177,11 +181,12 @@ public ApplicationRepository(TenantRepository tenantRepository,
177181
TesterClient testerClient,
178182
HealthCheckerProvider healthCheckers,
179183
Metric metric,
180-
FlagSource flagSource) {
184+
FlagSource flagSource,
185+
ComponentRegistry<NodeSuspensionProvider> nodeSuspensionProviders) {
181186
this(tenantRepository,
182187
hostProvisionerProvider.getHostProvisioner(),
183188
infraDeployerProvider.getInfraDeployer(),
184-
configConvergenceChecker,
189+
configConvergenceChecker,
185190
configStateChecker,
186191
httpProxy,
187192
EndpointsChecker.of(healthCheckers.getHealthChecker()),
@@ -193,7 +198,16 @@ public ApplicationRepository(TenantRepository tenantRepository,
193198
new SecretStoreValidator(),
194199
new DefaultClusterReindexingStatusClient(),
195200
new ActiveTokenFingerprintsClient(),
196-
flagSource);
201+
flagSource,
202+
new DeploymentMetricsRetriever(new ClusterDeploymentMetricsRetriever(),
203+
nodeSuspensionProvider(nodeSuspensionProviders)));
204+
}
205+
206+
private static NodeSuspensionProvider nodeSuspensionProvider(ComponentRegistry<NodeSuspensionProvider> registry) {
207+
var providers = registry.allComponents();
208+
if (providers.size() > 1)
209+
throw new IllegalArgumentException("Multiple NodeSuspensionProvider components registered: " + providers);
210+
return providers.isEmpty() ? NodeSuspensionProvider.EMPTY : providers.get(0);
197211
}
198212

199213
private ApplicationRepository(TenantRepository tenantRepository,
@@ -211,7 +225,8 @@ private ApplicationRepository(TenantRepository tenantRepository,
211225
SecretStoreValidator secretStoreValidator,
212226
ClusterReindexingStatusClient clusterReindexingStatusClient,
213227
ActiveTokenFingerprints activeTokenFingerprints,
214-
FlagSource flagSource) {
228+
FlagSource flagSource,
229+
DeploymentMetricsRetriever deploymentMetricsRetriever) {
215230
this.tenantRepository = Objects.requireNonNull(tenantRepository);
216231
this.hostProvisioner = Objects.requireNonNull(hostProvisioner);
217232
this.infraDeployer = Objects.requireNonNull(infraDeployer);
@@ -228,6 +243,7 @@ private ApplicationRepository(TenantRepository tenantRepository,
228243
this.clusterReindexingStatusClient = Objects.requireNonNull(clusterReindexingStatusClient);
229244
this.activeTokenFingerprints = Objects.requireNonNull(activeTokenFingerprints);
230245
this.flagSource = flagSource;
246+
this.deploymentMetricsRetriever = Objects.requireNonNull(deploymentMetricsRetriever);
231247
}
232248

233249
// Should be used by tests only (first constructor in this class makes sure we use injectable components where possible)
@@ -327,7 +343,8 @@ public ApplicationRepository build() {
327343
secretStoreValidator,
328344
ClusterReindexingStatusClient.DUMMY_INSTANCE,
329345
__ -> activeTokens,
330-
flagSource);
346+
flagSource,
347+
new DeploymentMetricsRetriever());
331348
}
332349

333350
}
@@ -983,7 +1000,6 @@ public SearchNodeMetricsResponse getSearchNodeMetrics(ApplicationId applicationI
9831000

9841001
public DeploymentMetricsResponse getDeploymentMetrics(ApplicationId applicationId) {
9851002
Application application = getApplication(applicationId);
986-
DeploymentMetricsRetriever deploymentMetricsRetriever = new DeploymentMetricsRetriever();
9871003
return deploymentMetricsRetriever.getMetrics(application);
9881004
}
9891005

configserver/src/main/java/com/yahoo/vespa/config/server/metrics/DeploymentMetricsRetriever.java

Lines changed: 14 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -3,10 +3,12 @@
33

44
import com.yahoo.config.model.api.HostInfo;
55
import com.yahoo.config.model.api.ServiceInfo;
6+
import com.yahoo.config.provision.NodeSuspensionProvider;
67
import com.yahoo.vespa.config.server.application.Application;
78
import com.yahoo.vespa.config.server.http.v2.response.DeploymentMetricsResponse;
89
import java.net.URI;
910
import java.util.Collection;
11+
import java.util.Set;
1012
import java.util.function.Predicate;
1113

1214
/**
@@ -18,28 +20,36 @@
1820
public class DeploymentMetricsRetriever {
1921

2022
private final ClusterDeploymentMetricsRetriever metricsRetriever;
23+
private final NodeSuspensionProvider nodeSuspensionProvider;
2124

2225
public DeploymentMetricsRetriever() {
23-
this(new ClusterDeploymentMetricsRetriever());
26+
this(new ClusterDeploymentMetricsRetriever(), NodeSuspensionProvider.EMPTY);
2427
}
2528

2629
public DeploymentMetricsRetriever(ClusterDeploymentMetricsRetriever metricsRetriever) {
30+
this(metricsRetriever, NodeSuspensionProvider.EMPTY);
31+
}
32+
33+
public DeploymentMetricsRetriever(ClusterDeploymentMetricsRetriever metricsRetriever,
34+
NodeSuspensionProvider nodeSuspensionProvider) {
2735
this.metricsRetriever = metricsRetriever;
36+
this.nodeSuspensionProvider = nodeSuspensionProvider;
2837
}
2938

3039
public DeploymentMetricsResponse getMetrics(Application application) {
31-
var hosts = getHostsOfApplication(application);
40+
var suspendedHostnames = nodeSuspensionProvider.suspendedHosts(application.getId());
41+
var hosts = getHostsOfApplication(application, suspendedHostnames);
3242
var clusterMetrics = metricsRetriever.requestMetricsGroupedByCluster(hosts);
3343
return new DeploymentMetricsResponse(application.getId(), clusterMetrics);
3444
}
3545

36-
private static Collection<URI> getHostsOfApplication(Application application) {
46+
private static Collection<URI> getHostsOfApplication(Application application, Set<String> suspendedHostnames) {
3747
return application.getModel().getHosts().stream()
3848
.filter(host -> host.getServices().stream().noneMatch(isLogserver()))
49+
.filter(host -> !suspendedHostnames.contains(host.getHostname()))
3950
.map(HostInfo::getHostname)
4051
.map(DeploymentMetricsRetriever::createMetricsProxyURI)
4152
.toList();
42-
4353
}
4454

4555
private static Predicate<ServiceInfo> isLogserver() {

configserver/src/test/java/com/yahoo/vespa/config/server/metrics/DeploymentMetricsRetrieverTest.java

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
import com.yahoo.config.model.api.ServiceInfo;
99
import com.yahoo.config.provision.AllocatedHosts;
1010
import com.yahoo.config.provision.ApplicationId;
11+
import com.yahoo.config.provision.NodeSuspensionProvider;
1112
import com.yahoo.vespa.config.ConfigKey;
1213
import com.yahoo.vespa.config.buildergen.ConfigDefinition;
1314
import com.yahoo.vespa.config.server.application.Application;
@@ -22,6 +23,7 @@
2223
import java.util.Set;
2324

2425
import static org.junit.Assert.assertEquals;
26+
import static org.junit.Assert.assertTrue;
2527

2628
/**
2729
* @author olaa
@@ -41,6 +43,23 @@ public void getMetrics() {
4143
assertEquals(2, mockMetricsRetriever.hosts.size()); // Verify that logserver was ignored
4244
}
4345

46+
@Test
47+
public void suspendedHostsAreFiltered() {
48+
MockModel mockModel = new MockModel(mockHosts());
49+
MockDeploymentMetricsRetriever mockMetricsRetriever = new MockDeploymentMetricsRetriever();
50+
ApplicationId applicationId = ApplicationId.fromSerializedForm("tenant:app:instance");
51+
Application application = new Application(mockModel, null, 0,
52+
null, null, applicationId);
53+
54+
NodeSuspensionProvider suspensionProvider = id -> Set.of("host1");
55+
DeploymentMetricsRetriever retriever = new DeploymentMetricsRetriever(mockMetricsRetriever, suspensionProvider);
56+
retriever.getMetrics(application);
57+
58+
assertEquals(1, mockMetricsRetriever.hosts.size()); // logserver (host3) and suspended host1 are ignored
59+
assertTrue(mockMetricsRetriever.hosts.stream().anyMatch(uri -> uri.getHost().equals("host2")));
60+
assertTrue(mockMetricsRetriever.hosts.stream().noneMatch(uri -> uri.getHost().equals("host1")));
61+
}
62+
4463
private Collection<HostInfo> mockHosts() {
4564

4665
HostInfo hostInfo1 = new HostInfo("host1",

0 commit comments

Comments
 (0)