Skip to content

Commit 340aaaf

Browse files
committed
HBASE-27574 Implement ClusterManager interface for Kubernetes
A basic implementation that supports taking destructive actions. Assume that services are running behind a resilient `Deployment` of some kind, and that the cluster will handle starting up replacement processes. Requires specification of a scoping namespace.
1 parent da26134 commit 340aaaf

2 files changed

Lines changed: 277 additions & 0 deletions

File tree

hbase-it/pom.xml

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@
3232
<description>Integration and System tests for HBase</description>
3333

3434
<properties>
35+
<k8s.version>17.0.0</k8s.version>
3536
<!-- Test inclusion patterns used by failsafe configuration -->
3637
<unittest.include>**/Test*.java</unittest.include>
3738
<integrationtest.include>**/IntegrationTest*.java</integrationtest.include>
@@ -174,6 +175,24 @@
174175
<artifactId>javax.servlet-api</artifactId>
175176
<scope>test</scope>
176177
</dependency>
178+
<dependency>
179+
<groupId>io.kubernetes</groupId>
180+
<artifactId>client-java</artifactId>
181+
<version>${k8s.version}</version>
182+
<scope>test</scope>
183+
</dependency>
184+
<dependency>
185+
<groupId>io.kubernetes</groupId>
186+
<artifactId>client-java-api</artifactId>
187+
<version>${k8s.version}</version>
188+
<scope>test</scope>
189+
<exclusions>
190+
<exclusion>
191+
<groupId>com.google.code.findbugs</groupId>
192+
<artifactId>jsr305</artifactId>
193+
</exclusion>
194+
</exclusions>
195+
</dependency>
177196
<dependency>
178197
<groupId>junit</groupId>
179198
<artifactId>junit</artifactId>
Lines changed: 258 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,258 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one
3+
* or more contributor license agreements. See the NOTICE file
4+
* distributed with this work for additional information
5+
* regarding copyright ownership. The ASF licenses this file
6+
* to you under the Apache License, Version 2.0 (the
7+
* "License"); you may not use this file except in compliance
8+
* with the License. You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing, software
13+
* distributed under the License is distributed on an "AS IS" BASIS,
14+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15+
* See the License for the specific language governing permissions and
16+
* limitations under the License.
17+
*/
18+
package org.apache.hadoop.hbase;
19+
20+
import com.google.gson.JsonSyntaxException;
21+
import io.kubernetes.client.openapi.ApiClient;
22+
import io.kubernetes.client.openapi.ApiException;
23+
import io.kubernetes.client.openapi.apis.CoreV1Api;
24+
import io.kubernetes.client.openapi.models.V1ObjectMeta;
25+
import io.kubernetes.client.openapi.models.V1Pod;
26+
import io.kubernetes.client.openapi.models.V1PodList;
27+
import io.kubernetes.client.util.ClientBuilder;
28+
import java.io.IOException;
29+
import java.util.Collections;
30+
import java.util.HashSet;
31+
import java.util.Optional;
32+
import java.util.Set;
33+
import org.apache.hadoop.conf.Configuration;
34+
import org.apache.hadoop.conf.Configured;
35+
import org.apache.hadoop.hbase.procedure2.util.StringUtils;
36+
import org.apache.yetus.audience.InterfaceAudience;
37+
import org.slf4j.Logger;
38+
import org.slf4j.LoggerFactory;
39+
40+
/**
41+
* Cluster manager for K8s context. Supports taking destructive actions, and checking for the
42+
* presence of a running process. Assumes services are running behind some type of `Deployment` that
43+
* will handle starting replacement processes after a destructive action. Requires that the
44+
* configuration specify a Kubernetes namespace.
45+
* </p>
46+
* Note that the k8s java client is a bit dodgy unable to read responses when we call delete (i.e.
47+
* kill) and sometimes when checking isRunning; makes for noisy logs and sometimes the operations
48+
* overrun each other but generally succeed.
49+
*/
50+
@InterfaceAudience.LimitedPrivate(HBaseInterfaceAudience.CONFIG)
51+
public class KubernetesClusterManager extends Configured implements ClusterManager {
52+
private static final Logger LOG = LoggerFactory.getLogger(KubernetesClusterManager.class);
53+
54+
private static final String NAMESPACE_CONF_KEY =
55+
KubernetesClusterManager.class.getCanonicalName() + ".namespace";
56+
private static final String ZOOKEEPER_ROLE_SELECTOR_CONF_KEY =
57+
KubernetesClusterManager.class.getCanonicalName() + ".zookeeper_role";
58+
private static final String NAMENODE_ROLE_SELECTOR_CONF_KEY =
59+
KubernetesClusterManager.class.getCanonicalName() + ".namenode_role";
60+
private static final String JOURNALNODE_ROLE_SELECTOR_CONF_KEY =
61+
KubernetesClusterManager.class.getCanonicalName() + ".journalnode_role";
62+
private static final String DATANODE_ROLE_SELECTOR_CONF_KEY =
63+
KubernetesClusterManager.class.getCanonicalName() + ".datanode_role";
64+
private static final String MASTER_ROLE_SELECTOR_CONF_KEY =
65+
KubernetesClusterManager.class.getCanonicalName() + ".master_role";
66+
private static final String REGIONSERVER_ROLE_SELECTOR_CONF_KEY =
67+
KubernetesClusterManager.class.getCanonicalName() + ".regionserver_role";
68+
private static final Set<ServiceType> SUPPORTED_SERVICE_TYPES = buildSupportedServiceTypesSet();
69+
70+
private CoreV1Api api;
71+
private String namespace;
72+
73+
private static Set<ServiceType> buildSupportedServiceTypesSet() {
74+
final Set<ServiceType> set = new HashSet<>();
75+
set.add(ServiceType.ZOOKEEPER_SERVER);
76+
set.add(ServiceType.HADOOP_NAMENODE);
77+
set.add(ServiceType.HADOOP_JOURNALNODE);
78+
set.add(ServiceType.HADOOP_DATANODE);
79+
set.add(ServiceType.HBASE_MASTER);
80+
set.add(ServiceType.HBASE_REGIONSERVER);
81+
return Collections.unmodifiableSet(set);
82+
}
83+
84+
@Override
85+
public void setConf(Configuration configuration) {
86+
// This is usually called from the constructor (The call to super goes to
87+
// Configured which does a setConf in its construction) but the configuration
88+
// null at that time. After construction, setConf is called again.
89+
// Assume single-thread doing ClusterManager setup.
90+
if (configuration == null) {
91+
LOG.debug("Skipping because provided configuration=null");
92+
return;
93+
}
94+
if (getConf() != null) {
95+
LOG.debug("Skipping because configuration already set, getConf={}", getConf());
96+
return;
97+
}
98+
super.setConf(configuration);
99+
this.namespace = configuration.get(NAMESPACE_CONF_KEY);
100+
LOG.info(
101+
"Configuration={}, namespace={}, hbase.rootdir={}, hbase.zookeeper.quorum={}, "
102+
+ "hbase.client.zookeeper.quorum={}",
103+
configuration, this.namespace, configuration.get("hbase.rootdir"),
104+
configuration.get("hbase.zookeeper.quorum"),
105+
configuration.get("hbase.client.zookeeper.quorum"));
106+
final CoreV1Api coreV1Api;
107+
try {
108+
ApiClient client = ClientBuilder.cluster().build();
109+
io.kubernetes.client.openapi.Configuration.setDefaultApiClient(client);
110+
coreV1Api = new CoreV1Api();
111+
} catch (IOException ioe) {
112+
throw new RuntimeException("Failed Kubernetes ApiClient construction", ioe);
113+
}
114+
this.api = coreV1Api;
115+
}
116+
117+
@Override
118+
public void start(ServiceType service, String hostname, int port) {
119+
assertSupportedServiceType(service);
120+
// presumably kubernetes is automatically starting pods, so nothing to do here.
121+
}
122+
123+
@Override
124+
public void stop(ServiceType service, String hostname, int port) throws IOException {
125+
assertSupportedServiceType(service);
126+
kill(hostname);
127+
}
128+
129+
@Override
130+
public void restart(ServiceType service, String hostname, int port) throws IOException {
131+
assertSupportedServiceType(service);
132+
kill(hostname);
133+
}
134+
135+
@Override
136+
public void kill(ServiceType service, String hostname, int port) throws IOException {
137+
assertSupportedServiceType(service);
138+
kill(hostname);
139+
}
140+
141+
@Override
142+
public void suspend(ServiceType service, String hostname, int port) {
143+
throw unsupportedActionType("suspend");
144+
}
145+
146+
@Override
147+
public void resume(ServiceType service, String hostname, int port) {
148+
throw unsupportedActionType("resume");
149+
}
150+
151+
@Override
152+
public boolean isRunning(ServiceType service, String hostname, int port) throws IOException {
153+
final String roleSelector = roleSelectorForServiceType(service);
154+
return isRunning(roleSelector, hostname);
155+
}
156+
157+
private String roleSelectorForServiceType(final ServiceType serviceType) {
158+
assertSupportedServiceType(serviceType);
159+
switch (serviceType) {
160+
case HADOOP_NAMENODE:
161+
return getConf().get(NAMENODE_ROLE_SELECTOR_CONF_KEY, "namenode");
162+
case HADOOP_DATANODE:
163+
return getConf().get(DATANODE_ROLE_SELECTOR_CONF_KEY, "datanode");
164+
case HADOOP_JOURNALNODE:
165+
return getConf().get(JOURNALNODE_ROLE_SELECTOR_CONF_KEY, "journalnode");
166+
case ZOOKEEPER_SERVER:
167+
return getConf().get(ZOOKEEPER_ROLE_SELECTOR_CONF_KEY, "zookeeper");
168+
case HBASE_MASTER:
169+
return getConf().get(MASTER_ROLE_SELECTOR_CONF_KEY, "master");
170+
case HBASE_REGIONSERVER:
171+
return getConf().get(REGIONSERVER_ROLE_SELECTOR_CONF_KEY, "regionserver");
172+
default:
173+
throw new RuntimeException("should not happen");
174+
}
175+
}
176+
177+
private boolean isRunning(String roleSelector, String hostname) throws IOException {
178+
if (api == null) {
179+
return false;
180+
}
181+
final V1PodList list;
182+
try {
183+
list = api.listNamespacedPod(namespace, null, null, null, null,
184+
"role in (" + roleSelector + ")", null, null, null, null, null);
185+
} catch (ApiException e) {
186+
throw convertApiException(e);
187+
}
188+
if (list == null) {
189+
return false;
190+
}
191+
for (V1Pod item : list.getItems()) {
192+
final String podName =
193+
Optional.ofNullable(item).map(V1Pod::getMetadata).map(V1ObjectMeta::getName).orElse(null);
194+
if (StringUtils.isEmpty(podName)) {
195+
LOG.warn("Listing of namespace '{}' contains entry with empty pod name.", namespace);
196+
continue;
197+
}
198+
if (hostname.startsWith(podName)) {
199+
return true;
200+
}
201+
}
202+
return false;
203+
}
204+
205+
/**
206+
* @param hostname fully qualified hostname of the target pod.
207+
* @return The pod name without the domain.
208+
*/
209+
private static String hostNameToPodName(String hostname) {
210+
return hostname.substring(0, hostname.indexOf("."));
211+
}
212+
213+
private void kill(String hostname) throws IOException {
214+
try {
215+
final String podName = hostNameToPodName(hostname);
216+
LOG.debug("Deleting pod: {}.{}", namespace, podName);
217+
api.deleteNamespacedPod(podName, namespace, null, null, null, null, null, null);
218+
} catch (JsonSyntaxException e) {
219+
handleJsonSyntaxException(e);
220+
} catch (ApiException e) {
221+
throw convertApiException(e);
222+
}
223+
}
224+
225+
/**
226+
* See <a href="https://github.com/kubernetes-client/java/issues/86">kubernetes-client/java#86</a>
227+
*/
228+
private void handleJsonSyntaxException(JsonSyntaxException e) throws JsonSyntaxException {
229+
if (!(e.getCause() instanceof IllegalStateException)) {
230+
throw e;
231+
}
232+
final IllegalStateException ise = (IllegalStateException) e.getCause();
233+
if (
234+
ise.getMessage() != null && ise.getMessage().contains("BEGIN_OBJECT")
235+
&& ise.getMessage().contains("Expected")
236+
) {
237+
LOG.info("Operation probably succeeded but parse of result failed, "
238+
+ "see https://github.com/kubernetes-client/java/issues/86");
239+
} else {
240+
throw e;
241+
}
242+
}
243+
244+
private static IOException convertApiException(final ApiException e) {
245+
return new IOException(
246+
"response body: " + e.getResponseBody() + ", response code: " + e.getCode(), e);
247+
}
248+
249+
private static RuntimeException unsupportedActionType(final String action) {
250+
return new RuntimeException("Unable to service request for action=" + action);
251+
}
252+
253+
private static void assertSupportedServiceType(final ServiceType serviceType) {
254+
if (!SUPPORTED_SERVICE_TYPES.contains(serviceType)) {
255+
throw new RuntimeException("Unsupported ServiceType " + serviceType);
256+
}
257+
}
258+
}

0 commit comments

Comments
 (0)