Skip to content

Commit 43e421a

Browse files
HDDS-918. Expose SCMMXBean as a MetricsSource. Contributed by Siddharth Wagle.
1 parent 771ea6b commit 43e421a

6 files changed

Lines changed: 237 additions & 27 deletions

File tree

hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/ContainerManager.java

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,14 @@ public interface ContainerManager extends Closeable {
5656
*/
5757
List<ContainerInfo> getContainers(HddsProtos.LifeCycleState state);
5858

59+
/**
60+
* Returns number of containers in the given,
61+
* {@link org.apache.hadoop.hdds.protocol.proto.HddsProtos.LifeCycleState}.
62+
*
63+
* @return Number of containers
64+
*/
65+
Integer getContainerCountByState(HddsProtos.LifeCycleState state);
66+
5967
/**
6068
* Returns the ContainerInfo from the container ID.
6169
*

hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/ContainerStateManager.java

Lines changed: 37 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -17,41 +17,41 @@
1717

1818
package org.apache.hadoop.hdds.scm.container;
1919

20-
import com.google.common.base.Preconditions;
20+
import static org.apache.hadoop.hdds.scm.exceptions.SCMException.ResultCodes
21+
.FAILED_TO_CHANGE_CONTAINER_STATE;
22+
23+
import java.io.IOException;
24+
import java.util.HashSet;
25+
import java.util.List;
26+
import java.util.Map;
27+
import java.util.NavigableSet;
28+
import java.util.Set;
29+
import java.util.concurrent.ConcurrentHashMap;
30+
import java.util.concurrent.atomic.AtomicLong;
2131

2232
import org.apache.hadoop.conf.Configuration;
2333
import org.apache.hadoop.conf.StorageUnit;
34+
import org.apache.hadoop.hdds.protocol.proto.HddsProtos;
35+
import org.apache.hadoop.hdds.protocol.proto.HddsProtos.LifeCycleEvent;
36+
import org.apache.hadoop.hdds.protocol.proto.HddsProtos.LifeCycleState;
37+
import org.apache.hadoop.hdds.protocol.proto.HddsProtos.ReplicationFactor;
38+
import org.apache.hadoop.hdds.protocol.proto.HddsProtos.ReplicationType;
2439
import org.apache.hadoop.hdds.scm.ScmConfigKeys;
25-
import org.apache.hadoop.hdds.scm.pipeline.Pipeline;
2640
import org.apache.hadoop.hdds.scm.container.states.ContainerState;
2741
import org.apache.hadoop.hdds.scm.container.states.ContainerStateMap;
2842
import org.apache.hadoop.hdds.scm.exceptions.SCMException;
43+
import org.apache.hadoop.hdds.scm.pipeline.Pipeline;
2944
import org.apache.hadoop.hdds.scm.pipeline.PipelineID;
3045
import org.apache.hadoop.hdds.scm.pipeline.PipelineManager;
31-
import org.apache.hadoop.hdds.protocol.proto.HddsProtos;
32-
import org.apache.hadoop.hdds.protocol.proto.HddsProtos.LifeCycleEvent;
33-
import org.apache.hadoop.hdds.protocol.proto.HddsProtos.LifeCycleState;
34-
import org.apache.hadoop.hdds.protocol.proto.HddsProtos.ReplicationFactor;
35-
import org.apache.hadoop.hdds.protocol.proto.HddsProtos.ReplicationType;
3646
import org.apache.hadoop.ozone.common.statemachine
3747
.InvalidStateTransitionException;
3848
import org.apache.hadoop.ozone.common.statemachine.StateMachine;
3949
import org.apache.hadoop.util.Time;
40-
4150
import org.slf4j.Logger;
4251
import org.slf4j.LoggerFactory;
4352

44-
import java.io.IOException;
45-
import java.util.HashSet;
46-
import java.util.List;
47-
import java.util.Map;
48-
import java.util.NavigableSet;
49-
import java.util.Set;
50-
import java.util.concurrent.ConcurrentHashMap;
51-
import java.util.concurrent.atomic.AtomicLong;
52-
53-
import static org.apache.hadoop.hdds.scm.exceptions.SCMException.ResultCodes
54-
.FAILED_TO_CHANGE_CONTAINER_STATE;
53+
import com.google.common.base.Preconditions;
54+
import com.google.common.util.concurrent.AtomicLongMap;
5555

5656
/**
5757
* A container state manager keeps track of container states and returns
@@ -121,6 +121,8 @@ public class ContainerStateManager {
121121
private final ConcurrentHashMap<ContainerState, ContainerID> lastUsedMap;
122122
private final ContainerStateMap containers;
123123
private final AtomicLong containerCount;
124+
private final AtomicLongMap<LifeCycleState> containerStateCount =
125+
AtomicLongMap.create();
124126

125127
/**
126128
* Constructs a Container State Manager that tracks all containers owned by
@@ -224,11 +226,12 @@ private void initializeStateMachine() {
224226
LifeCycleEvent.CLEANUP);
225227
}
226228

227-
void loadContainer(final ContainerInfo containerInfo)
228-
throws SCMException {
229+
230+
void loadContainer(final ContainerInfo containerInfo) throws SCMException {
229231
containers.addContainer(containerInfo);
230232
containerCount.set(Long.max(
231233
containerInfo.getContainerID(), containerCount.get()));
234+
containerStateCount.incrementAndGet(containerInfo.getState());
232235
}
233236

234237
/**
@@ -297,6 +300,7 @@ ContainerInfo allocateContainer(
297300
ContainerID.valueof(containerID));
298301
Preconditions.checkNotNull(containerInfo);
299302
containers.addContainer(containerInfo);
303+
containerStateCount.incrementAndGet(containerInfo.getState());
300304
LOG.trace("New container allocated: {}", containerInfo);
301305
return containerInfo;
302306
}
@@ -317,6 +321,8 @@ ContainerInfo updateContainerState(final ContainerID containerID,
317321
final LifeCycleState newState = stateMachine.getNextState(
318322
info.getState(), event);
319323
containers.updateState(containerID, info.getState(), newState);
324+
containerStateCount.incrementAndGet(newState);
325+
containerStateCount.decrementAndGet(info.getState());
320326
return containers.getContainerInfo(containerID);
321327
} catch (InvalidStateTransitionException ex) {
322328
String error = String.format("Failed to update container state %s, " +
@@ -440,6 +446,16 @@ Set<ContainerID> getContainerIDsByState(final LifeCycleState state) {
440446
return containers.getContainerIDsByState(state);
441447
}
442448

449+
/**
450+
* Get count of containers in the current {@link LifeCycleState}.
451+
*
452+
* @param state {@link LifeCycleState}
453+
* @return Count of containers
454+
*/
455+
Integer getContainerCountByState(final LifeCycleState state) {
456+
return Long.valueOf(containerStateCount.get(state)).intValue();
457+
}
458+
443459
/**
444460
* Returns a set of ContainerIDs that match the Container.
445461
*
@@ -467,8 +483,6 @@ ContainerInfo getContainer(final ContainerID containerID)
467483
return containers.getContainerInfo(containerID);
468484
}
469485

470-
471-
472486
void close() throws IOException {
473487
}
474488

hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/SCMContainerManager.java

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -176,6 +176,16 @@ public List<ContainerInfo> getContainers(LifeCycleState state) {
176176
}
177177
}
178178

179+
/**
180+
* Get number of containers in the given state.
181+
*
182+
* @param state {@link LifeCycleState}
183+
* @return Count
184+
*/
185+
public Integer getContainerCountByState(LifeCycleState state) {
186+
return containerStateManager.getContainerCountByState(state);
187+
}
188+
179189
/**
180190
* {@inheritDoc}
181191
*/
Lines changed: 87 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,87 @@
1+
/**
2+
* Licensed to the Apache Software Foundation (ASF) under one
3+
* or more contributor license agreements. See the NOTICE file
4+
* distributed with this work for additional information
5+
* regarding copyright ownership. The ASF licenses this file
6+
* to you under the Apache License, Version 2.0 (the
7+
* "License"); you may not use this file except in compliance
8+
* with the License. You may obtain a copy of the License at
9+
* <p>
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
* <p>
12+
* Unless required by applicable law or agreed to in writing, software
13+
* distributed under the License is distributed on an "AS IS" BASIS,
14+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15+
* See the License for the specific language governing permissions and
16+
* limitations under the License.
17+
*/
18+
package org.apache.hadoop.hdds.scm.server;
19+
20+
import static org.apache.hadoop.hdds.protocol.proto.HddsProtos.LifeCycleState.CLOSED;
21+
import static org.apache.hadoop.hdds.protocol.proto.HddsProtos.LifeCycleState.CLOSING;
22+
import static org.apache.hadoop.hdds.protocol.proto.HddsProtos.LifeCycleState.DELETED;
23+
import static org.apache.hadoop.hdds.protocol.proto.HddsProtos.LifeCycleState.DELETING;
24+
import static org.apache.hadoop.hdds.protocol.proto.HddsProtos.LifeCycleState.OPEN;
25+
import static org.apache.hadoop.hdds.protocol.proto.HddsProtos.LifeCycleState.QUASI_CLOSED;
26+
27+
import java.util.Map;
28+
29+
import org.apache.hadoop.classification.InterfaceAudience;
30+
import org.apache.hadoop.metrics2.MetricsCollector;
31+
import org.apache.hadoop.metrics2.MetricsSource;
32+
import org.apache.hadoop.metrics2.MetricsSystem;
33+
import org.apache.hadoop.metrics2.annotation.Metrics;
34+
import org.apache.hadoop.metrics2.lib.DefaultMetricsSystem;
35+
import org.apache.hadoop.metrics2.lib.Interns;
36+
37+
/**
38+
* Metrics source to report number of containers in different states.
39+
*/
40+
@InterfaceAudience.Private
41+
@Metrics(about = "SCM Container Manager Metrics", context = "ozone")
42+
public class SCMContainerMetrics implements MetricsSource {
43+
44+
private final SCMMXBean scmmxBean;
45+
private static final String SOURCE = SCMContainerMetrics.class.getName();
46+
47+
public SCMContainerMetrics(SCMMXBean scmmxBean) {
48+
this.scmmxBean = scmmxBean;
49+
}
50+
51+
public static SCMContainerMetrics create(SCMMXBean scmmxBean) {
52+
MetricsSystem ms = DefaultMetricsSystem.instance();
53+
return ms.register(SOURCE, "Storage " +
54+
"Container Manager Metrics", new SCMContainerMetrics(scmmxBean));
55+
}
56+
57+
public void unRegister() {
58+
MetricsSystem ms = DefaultMetricsSystem.instance();
59+
ms.unregisterSource(SOURCE);
60+
}
61+
62+
@Override
63+
@SuppressWarnings("SuspiciousMethodCalls")
64+
public void getMetrics(MetricsCollector collector, boolean all) {
65+
Map<String, Integer> stateCount = scmmxBean.getContainerStateCount();
66+
67+
collector.addRecord(SOURCE)
68+
.addGauge(Interns.info("OpenContainers",
69+
"Number of open containers"),
70+
stateCount.get(OPEN.toString()))
71+
.addGauge(Interns.info("ClosingContainers",
72+
"Number of containers in closing state"),
73+
stateCount.get(CLOSING.toString()))
74+
.addGauge(Interns.info("QuasiClosedContainers",
75+
"Number of containers in quasi closed state"),
76+
stateCount.get(QUASI_CLOSED.toString()))
77+
.addGauge(Interns.info("ClosedContainers",
78+
"Number of containers in closed state"),
79+
stateCount.get(CLOSED.toString()))
80+
.addGauge(Interns.info("DeletingContainers",
81+
"Number of containers in deleting state"),
82+
stateCount.get(DELETING.toString()))
83+
.addGauge(Interns.info("DeletedContainers",
84+
"Number of containers in deleted state"),
85+
stateCount.get(DELETED.toString()));
86+
}
87+
}

hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/StorageContainerManager.java

Lines changed: 14 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -162,7 +162,7 @@ public final class StorageContainerManager extends ServiceRuntimeInfoImpl
162162
private final SCMDatanodeProtocolServer datanodeProtocolServer;
163163
private final SCMBlockProtocolServer blockProtocolServer;
164164
private final SCMClientProtocolServer clientProtocolServer;
165-
private SCMSecurityProtocolServer securityProtocolServer;
165+
private SCMSecurityProtocolServer securityProtocolServer;
166166

167167
/*
168168
* State Managers of SCM.
@@ -206,6 +206,7 @@ public final class StorageContainerManager extends ServiceRuntimeInfoImpl
206206
private JvmPauseMonitor jvmPauseMonitor;
207207
private final OzoneConfiguration configuration;
208208
private final ChillModeHandler chillModeHandler;
209+
private SCMContainerMetrics scmContainerMetrics;
209210

210211
/**
211212
* Creates a new StorageContainerManager. Configuration will be
@@ -239,7 +240,7 @@ public StorageContainerManager(OzoneConfiguration conf,
239240
Objects.requireNonNull(conf, "configuration cannot not be null");
240241

241242
configuration = conf;
242-
StorageContainerManager.initMetrics();
243+
initMetrics();
243244
initContainerReportCache(conf);
244245
/**
245246
* It is assumed the scm --init command creates the SCM Storage Config.
@@ -366,6 +367,7 @@ public StorageContainerManager(OzoneConfiguration conf,
366367
eventQueue.addHandler(SCMEvents.PIPELINE_REPORT, pipelineReportHandler);
367368
eventQueue.addHandler(SCMEvents.CHILL_MODE_STATUS, chillModeHandler);
368369
registerMXBean();
370+
registerMetricsSource(this);
369371
}
370372

371373
/**
@@ -841,6 +843,10 @@ private void registerMXBean() {
841843
jmxProperties, this);
842844
}
843845

846+
private void registerMetricsSource(SCMMXBean scmMBean) {
847+
scmContainerMetrics = SCMContainerMetrics.create(scmMBean);
848+
}
849+
844850
private void unregisterMXBean() {
845851
if (this.scmInfoBeanName != null) {
846852
MBeans.unregister(this.scmInfoBeanName);
@@ -999,6 +1005,10 @@ public void stop() {
9991005
}
10001006

10011007
unregisterMXBean();
1008+
if (scmContainerMetrics != null) {
1009+
scmContainerMetrics.unRegister();
1010+
}
1011+
10021012
// Event queue must be stopped before the DB store is closed at the end.
10031013
try {
10041014
LOG.info("Stopping SCM Event Queue.");
@@ -1195,8 +1205,8 @@ public double getCurrentContainerThreshold() {
11951205
public Map<String, Integer> getContainerStateCount() {
11961206
Map<String, Integer> nodeStateCount = new HashMap<>();
11971207
for (HddsProtos.LifeCycleState state : HddsProtos.LifeCycleState.values()) {
1198-
nodeStateCount.put(state.toString(), containerManager.getContainers(
1199-
state).size());
1208+
nodeStateCount.put(state.toString(),
1209+
containerManager.getContainerCountByState(state));
12001210
}
12011211
return nodeStateCount;
12021212
}
Lines changed: 81 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,81 @@
1+
/**
2+
* Licensed to the Apache Software Foundation (ASF) under one
3+
* or more contributor license agreements. See the NOTICE file
4+
* distributed with this work for additional information
5+
* regarding copyright ownership. The ASF licenses this file
6+
* to you under the Apache License, Version 2.0 (the
7+
* "License"); you may not use this file except in compliance
8+
* with the License. You may obtain a copy of the License at
9+
* <p>
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
* <p>
12+
* Unless required by applicable law or agreed to in writing, software
13+
* distributed under the License is distributed on an "AS IS" BASIS,
14+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15+
* See the License for the specific language governing permissions and
16+
* limitations under the License.
17+
*/
18+
package org.apache.hadoop.hdds.scm.server;
19+
20+
import static org.mockito.Matchers.any;
21+
import static org.mockito.Matchers.anyInt;
22+
import static org.mockito.Matchers.anyString;
23+
import static org.mockito.Mockito.mock;
24+
import static org.mockito.Mockito.times;
25+
import static org.mockito.Mockito.verify;
26+
import static org.mockito.Mockito.when;
27+
28+
import java.util.HashMap;
29+
import java.util.Map;
30+
31+
import org.apache.hadoop.hdds.protocol.proto.HddsProtos;
32+
import org.apache.hadoop.metrics2.MetricsCollector;
33+
import org.apache.hadoop.metrics2.MetricsInfo;
34+
import org.apache.hadoop.metrics2.MetricsRecordBuilder;
35+
import org.apache.hadoop.metrics2.lib.Interns;
36+
import org.junit.Test;
37+
38+
/**
39+
* Test metrics that represent container states.
40+
*/
41+
public class TestSCMContainerMetrics {
42+
@Test
43+
public void testSCMContainerMetrics() {
44+
SCMMXBean scmmxBean = mock(SCMMXBean.class);
45+
46+
Map<String, Integer> stateInfo = new HashMap<String, Integer>() {{
47+
put(HddsProtos.LifeCycleState.OPEN.toString(), 2);
48+
put(HddsProtos.LifeCycleState.CLOSING.toString(), 3);
49+
put(HddsProtos.LifeCycleState.QUASI_CLOSED.toString(), 4);
50+
put(HddsProtos.LifeCycleState.CLOSED.toString(), 5);
51+
put(HddsProtos.LifeCycleState.DELETING.toString(), 6);
52+
put(HddsProtos.LifeCycleState.DELETED.toString(), 7);
53+
}};
54+
55+
56+
when(scmmxBean.getContainerStateCount()).thenReturn(stateInfo);
57+
58+
MetricsRecordBuilder mb = mock(MetricsRecordBuilder.class);
59+
when(mb.addGauge(any(MetricsInfo.class), anyInt())).thenReturn(mb);
60+
61+
MetricsCollector metricsCollector = mock(MetricsCollector.class);
62+
when(metricsCollector.addRecord(anyString())).thenReturn(mb);
63+
64+
SCMContainerMetrics containerMetrics = new SCMContainerMetrics(scmmxBean);
65+
66+
containerMetrics.getMetrics(metricsCollector, true);
67+
68+
verify(mb, times(1)).addGauge(Interns.info("OpenContainers",
69+
"Number of open containers"), 2);
70+
verify(mb, times(1)).addGauge(Interns.info("ClosingContainers",
71+
"Number of containers in closing state"), 3);
72+
verify(mb, times(1)).addGauge(Interns.info("QuasiClosedContainers",
73+
"Number of containers in quasi closed state"), 4);
74+
verify(mb, times(1)).addGauge(Interns.info("ClosedContainers",
75+
"Number of containers in closed state"), 5);
76+
verify(mb, times(1)).addGauge(Interns.info("DeletingContainers",
77+
"Number of containers in deleting state"), 6);
78+
verify(mb, times(1)).addGauge(Interns.info("DeletedContainers",
79+
"Number of containers in deleted state"), 7);
80+
}
81+
}

0 commit comments

Comments
 (0)