Skip to content

Commit 1f7465f

Browse files
authored
TEZ-3331: Add operation specific HDFS counters for Tez UI (#379) (Laszlo Bodor reviewed by Ayush Saxena)
1 parent d84fdca commit 1f7465f

7 files changed

Lines changed: 288 additions & 92 deletions

File tree

tez-api/src/main/java/org/apache/tez/common/counters/FileSystemCounter.java

Lines changed: 61 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -19,16 +19,68 @@
1919
package org.apache.tez.common.counters;
2020

2121
import org.apache.hadoop.classification.InterfaceAudience.Private;
22+
import org.apache.hadoop.fs.StorageStatistics.CommonStatisticNames;
2223

24+
/**
25+
* FileSystemCounter is an enum for defining which filesystem/storage statistics are exposed in Tez.
26+
*/
2327
@Private
2428
public enum FileSystemCounter {
25-
BYTES_READ,
26-
BYTES_WRITTEN,
27-
READ_OPS,
28-
LARGE_READ_OPS,
29-
WRITE_OPS,
30-
HDFS_BYTES_READ,
31-
HDFS_BYTES_WRITTEN,
32-
FILE_BYTES_READ,
33-
FILE_BYTES_WRITTEN
29+
BYTES_READ("bytesRead"),
30+
BYTES_WRITTEN("bytesWritten"),
31+
READ_OPS("readOps"),
32+
LARGE_READ_OPS("largeReadOps"),
33+
WRITE_OPS("writeOps"),
34+
35+
// Additional counters from HADOOP-13305
36+
OP_APPEND(CommonStatisticNames.OP_APPEND),
37+
OP_COPY_FROM_LOCAL_FILE(CommonStatisticNames.OP_COPY_FROM_LOCAL_FILE),
38+
OP_CREATE(CommonStatisticNames.OP_CREATE),
39+
OP_CREATE_NON_RECURSIVE(CommonStatisticNames.OP_CREATE_NON_RECURSIVE),
40+
OP_DELETE(CommonStatisticNames.OP_DELETE),
41+
OP_EXISTS(CommonStatisticNames.OP_EXISTS),
42+
OP_GET_CONTENT_SUMMARY(CommonStatisticNames.OP_GET_CONTENT_SUMMARY),
43+
OP_GET_DELEGATION_TOKEN(CommonStatisticNames.OP_GET_DELEGATION_TOKEN),
44+
OP_GET_FILE_CHECKSUM(CommonStatisticNames.OP_GET_FILE_CHECKSUM),
45+
OP_GET_FILE_STATUS(CommonStatisticNames.OP_GET_FILE_STATUS),
46+
OP_GET_STATUS(CommonStatisticNames.OP_GET_STATUS),
47+
OP_GLOB_STATUS(CommonStatisticNames.OP_GLOB_STATUS),
48+
OP_IS_FILE(CommonStatisticNames.OP_IS_FILE),
49+
OP_IS_DIRECTORY(CommonStatisticNames.OP_IS_DIRECTORY),
50+
OP_LIST_FILES(CommonStatisticNames.OP_LIST_FILES),
51+
OP_LIST_LOCATED_STATUS(CommonStatisticNames.OP_LIST_LOCATED_STATUS),
52+
OP_LIST_STATUS(CommonStatisticNames.OP_LIST_STATUS),
53+
OP_MKDIRS(CommonStatisticNames.OP_MKDIRS),
54+
OP_MODIFY_ACL_ENTRIES(CommonStatisticNames.OP_MODIFY_ACL_ENTRIES),
55+
OP_OPEN(CommonStatisticNames.OP_OPEN),
56+
OP_REMOVE_ACL(CommonStatisticNames.OP_REMOVE_ACL),
57+
OP_REMOVE_ACL_ENTRIES(CommonStatisticNames.OP_REMOVE_ACL_ENTRIES),
58+
OP_REMOVE_DEFAULT_ACL(CommonStatisticNames.OP_REMOVE_DEFAULT_ACL),
59+
OP_RENAME(CommonStatisticNames.OP_RENAME),
60+
OP_SET_ACL(CommonStatisticNames.OP_SET_ACL),
61+
OP_SET_OWNER(CommonStatisticNames.OP_SET_OWNER),
62+
OP_SET_PERMISSION(CommonStatisticNames.OP_SET_PERMISSION),
63+
OP_SET_TIMES(CommonStatisticNames.OP_SET_TIMES),
64+
OP_TRUNCATE(CommonStatisticNames.OP_TRUNCATE),
65+
66+
// counters below are not needed in production, as the scheme_countername expansion is taken care of by the
67+
// FileSystemCounterGroup, the only reason they are here is that some analyzers still depend on them
68+
@Deprecated
69+
HDFS_BYTES_READ("hdfsBytesRead"),
70+
@Deprecated
71+
HDFS_BYTES_WRITTEN("hdfsBytesWritten"),
72+
@Deprecated
73+
FILE_BYTES_READ("fileBytesRead"),
74+
@Deprecated
75+
FILE_BYTES_WRITTEN("fileBytesWritten");
76+
77+
private final String opName;
78+
79+
FileSystemCounter(String opName) {
80+
this.opName = opName;
81+
}
82+
83+
public String getOpName() {
84+
return opName;
85+
}
3486
}

tez-runtime-internals/pom.xml

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,23 @@
4949
<groupId>org.apache.hadoop</groupId>
5050
<artifactId>hadoop-common</artifactId>
5151
</dependency>
52+
<dependency>
53+
<groupId>org.apache.hadoop</groupId>
54+
<artifactId>hadoop-common</artifactId>
55+
<scope>test</scope>
56+
<type>test-jar</type>
57+
</dependency>
58+
<dependency>
59+
<groupId>org.apache.hadoop</groupId>
60+
<artifactId>hadoop-hdfs</artifactId>
61+
<scope>test</scope>
62+
</dependency>
63+
<dependency>
64+
<groupId>org.apache.hadoop</groupId>
65+
<artifactId>hadoop-hdfs</artifactId>
66+
<scope>test</scope>
67+
<type>test-jar</type>
68+
</dependency>
5269
<dependency>
5370
<groupId>org.apache.hadoop</groupId>
5471
<artifactId>hadoop-yarn-api</artifactId>

tez-runtime-internals/src/main/java/org/apache/tez/runtime/metrics/FileSystemStatisticUpdater.java

Lines changed: 14 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -5,9 +5,9 @@
55
* licenses this file to you under the Apache License, Version 2.0 (the
66
* "License"); you may not use this file except in compliance with the License.
77
* You may obtain a copy of the License at
8-
*
8+
*
99
* http://www.apache.org/licenses/LICENSE-2.0
10-
*
10+
*
1111
* Unless required by applicable law or agreed to in writing, software
1212
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
1313
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
@@ -17,9 +17,7 @@
1717

1818
package org.apache.tez.runtime.metrics;
1919

20-
import java.util.List;
21-
22-
import org.apache.hadoop.fs.FileSystem;
20+
import org.apache.hadoop.fs.StorageStatistics;
2321
import org.apache.tez.common.counters.FileSystemCounter;
2422
import org.apache.tez.common.counters.TezCounter;
2523
import org.apache.tez.common.counters.TezCounters;
@@ -30,50 +28,22 @@
3028
*/
3129
public class FileSystemStatisticUpdater {
3230

33-
private List<FileSystem.Statistics> stats;
34-
private TezCounter readBytesCounter, writeBytesCounter, readOpsCounter, largeReadOpsCounter,
35-
writeOpsCounter;
36-
private String scheme;
37-
private TezCounters counters;
31+
private final StorageStatistics stats;
32+
private final TezCounters counters;
3833

39-
FileSystemStatisticUpdater(TezCounters counters, List<FileSystem.Statistics> stats, String scheme) {
40-
this.stats = stats;
41-
this.scheme = scheme;
34+
FileSystemStatisticUpdater(TezCounters counters, StorageStatistics storageStatistics) {
35+
this.stats = storageStatistics;
4236
this.counters = counters;
4337
}
4438

4539
void updateCounters() {
46-
if (readBytesCounter == null) {
47-
readBytesCounter = counters.findCounter(scheme, FileSystemCounter.BYTES_READ);
48-
}
49-
if (writeBytesCounter == null) {
50-
writeBytesCounter = counters.findCounter(scheme, FileSystemCounter.BYTES_WRITTEN);
51-
}
52-
if (readOpsCounter == null) {
53-
readOpsCounter = counters.findCounter(scheme, FileSystemCounter.READ_OPS);
54-
}
55-
if (largeReadOpsCounter == null) {
56-
largeReadOpsCounter = counters.findCounter(scheme, FileSystemCounter.LARGE_READ_OPS);
57-
}
58-
if (writeOpsCounter == null) {
59-
writeOpsCounter = counters.findCounter(scheme, FileSystemCounter.WRITE_OPS);
60-
}
61-
long readBytes = 0;
62-
long writeBytes = 0;
63-
long readOps = 0;
64-
long largeReadOps = 0;
65-
long writeOps = 0;
66-
for (FileSystem.Statistics stat : stats) {
67-
readBytes = readBytes + stat.getBytesRead();
68-
writeBytes = writeBytes + stat.getBytesWritten();
69-
readOps = readOps + stat.getReadOps();
70-
largeReadOps = largeReadOps + stat.getLargeReadOps();
71-
writeOps = writeOps + stat.getWriteOps();
40+
// loop through FileSystemCounter enums as it is a smaller set
41+
for (FileSystemCounter fsCounter : FileSystemCounter.values()) {
42+
Long val = stats.getLong(fsCounter.getOpName());
43+
if (val != null && val != 0) {
44+
TezCounter counter = counters.findCounter(stats.getScheme(), fsCounter);
45+
counter.setValue(val);
46+
}
7247
}
73-
readBytesCounter.setValue(readBytes);
74-
writeBytesCounter.setValue(writeBytes);
75-
readOpsCounter.setValue(readOps);
76-
largeReadOpsCounter.setValue(largeReadOps);
77-
writeOpsCounter.setValue(writeOps);
7848
}
7949
}

tez-runtime-internals/src/main/java/org/apache/tez/runtime/metrics/TaskCounterUpdater.java

Lines changed: 16 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -18,17 +18,17 @@
1818

1919
package org.apache.tez.runtime.metrics;
2020

21-
import java.util.ArrayList;
2221
import java.util.HashMap;
23-
import java.util.List;
22+
import java.util.Iterator;
2423
import java.util.Map;
2524

25+
import org.apache.hadoop.fs.GlobalStorageStatistics;
26+
import org.apache.hadoop.fs.StorageStatistics;
2627
import org.apache.tez.util.TezMxBeanResourceCalculator;
2728
import org.slf4j.Logger;
2829
import org.slf4j.LoggerFactory;
2930
import org.apache.hadoop.conf.Configuration;
3031
import org.apache.hadoop.fs.FileSystem;
31-
import org.apache.hadoop.fs.FileSystem.Statistics;
3232
import org.apache.hadoop.yarn.util.ResourceCalculatorProcessTree;
3333
import org.apache.tez.common.GcTimeUpdater;
3434
import org.apache.tez.common.counters.TaskCounter;
@@ -49,10 +49,9 @@ public class TaskCounterUpdater {
4949
private final Configuration conf;
5050

5151
/**
52-
* A Map where Key-> URIScheme and value->FileSystemStatisticUpdater
52+
* A Map where Key-> URIScheme and value->Map<Name, FileSystemStatisticUpdater>
5353
*/
54-
private Map<String, FileSystemStatisticUpdater> statisticUpdaters =
55-
new HashMap<String, FileSystemStatisticUpdater>();
54+
private final Map<String, Map<String, FileSystemStatisticUpdater>> statisticUpdaters = new HashMap<>();
5655
protected final GcTimeUpdater gcUpdater;
5756
private ResourceCalculatorProcessTree pTree;
5857
private long initCpuCumulativeTime = 0;
@@ -67,34 +66,18 @@ public TaskCounterUpdater(TezCounters counters, Configuration conf, String pid)
6766
recordInitialCpuStats();
6867
}
6968

70-
69+
7170
public void updateCounters() {
72-
// FileSystemStatistics are reset each time a new task is seen by the
73-
// container.
74-
// This doesn't remove the fileSystem, and does not clear all statistics -
75-
// so there is a potential of an unused FileSystem showing up for a
76-
// Container, and strange values for READ_OPS etc.
77-
Map<String, List<FileSystem.Statistics>> map = new
78-
HashMap<String, List<FileSystem.Statistics>>();
79-
for(Statistics stat: FileSystem.getAllStatistics()) {
80-
String uriScheme = stat.getScheme();
81-
if (map.containsKey(uriScheme)) {
82-
List<FileSystem.Statistics> list = map.get(uriScheme);
83-
list.add(stat);
84-
} else {
85-
List<FileSystem.Statistics> list = new ArrayList<FileSystem.Statistics>();
86-
list.add(stat);
87-
map.put(uriScheme, list);
88-
}
89-
}
90-
for (Map.Entry<String, List<FileSystem.Statistics>> entry: map.entrySet()) {
91-
FileSystemStatisticUpdater updater = statisticUpdaters.get(entry.getKey());
92-
if(updater==null) {//new FileSystem has been found in the cache
93-
updater =
94-
new FileSystemStatisticUpdater(tezCounters, entry.getValue(),
95-
entry.getKey());
96-
statisticUpdaters.put(entry.getKey(), updater);
97-
}
71+
GlobalStorageStatistics globalStorageStatistics = FileSystem.getGlobalStorageStatistics();
72+
Iterator<StorageStatistics> iter = globalStorageStatistics.iterator();
73+
while (iter.hasNext()) {
74+
StorageStatistics stats = iter.next();
75+
// Fetch or initialize the updater set for the scheme
76+
Map<String, FileSystemStatisticUpdater> updaterSet = statisticUpdaters
77+
.computeIfAbsent(stats.getScheme(), k -> new HashMap<>());
78+
// Fetch or create the updater for the specific statistic
79+
FileSystemStatisticUpdater updater = updaterSet
80+
.computeIfAbsent(stats.getName(), k -> new FileSystemStatisticUpdater(tezCounters, stats));
9881
updater.updateCounters();
9982
}
10083

Lines changed: 118 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,118 @@
1+
/**
2+
* Licensed to the Apache Software Foundation (ASF) under one
3+
* or more contributor license agreements. See the NOTICE file
4+
* distributed with this work for additional information
5+
* regarding copyright ownership. The ASF licenses this file
6+
* to you under the Apache License, Version 2.0 (the
7+
* "License"); you may not use this file except in compliance
8+
* with the License. You may obtain a copy of the License at
9+
* <p/>
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
* <p/>
12+
* Unless required by applicable law or agreed to in writing, software
13+
* distributed under the License is distributed on an "AS IS" BASIS,
14+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15+
* See the License for the specific language governing permissions and
16+
* limitations under the License.
17+
*/
18+
19+
package org.apache.tez.runtime.metrics;
20+
21+
import java.io.IOException;
22+
23+
import org.apache.hadoop.conf.Configuration;
24+
import org.apache.hadoop.fs.FileSystem;
25+
import org.apache.hadoop.fs.Path;
26+
import org.apache.hadoop.hdfs.DFSTestUtil;
27+
import org.apache.hadoop.hdfs.MiniDFSCluster;
28+
import org.apache.tez.common.counters.FileSystemCounter;
29+
import org.apache.tez.common.counters.TezCounter;
30+
import org.apache.tez.common.counters.TezCounters;
31+
import org.junit.AfterClass;
32+
import org.junit.Assert;
33+
import org.junit.Before;
34+
import org.junit.BeforeClass;
35+
import org.junit.Test;
36+
import org.slf4j.Logger;
37+
import org.slf4j.LoggerFactory;
38+
39+
public class TestFileSystemStatisticUpdater {
40+
41+
private static final Logger LOG = LoggerFactory.getLogger(
42+
TestFileSystemStatisticUpdater.class);
43+
44+
private static MiniDFSCluster dfsCluster;
45+
46+
private static final Configuration CONF = new Configuration();
47+
private static FileSystem remoteFs;
48+
49+
private static final String TEST_ROOT_DIR = "target" + Path.SEPARATOR +
50+
TestFileSystemStatisticUpdater.class.getName() + "-tmpDir";
51+
52+
@BeforeClass
53+
public static void beforeClass() throws Exception {
54+
CONF.set(MiniDFSCluster.HDFS_MINIDFS_BASEDIR, TEST_ROOT_DIR);
55+
}
56+
57+
@AfterClass
58+
public static void tearDown() {
59+
if (dfsCluster != null) {
60+
dfsCluster.shutdown();
61+
dfsCluster = null;
62+
}
63+
}
64+
65+
@Before
66+
public void setup() throws IOException {
67+
FileSystem.clearStatistics();
68+
try {
69+
// tear down the whole cluster before each test to completely get rid of file system statistics
70+
if (dfsCluster != null) {
71+
dfsCluster.shutdown();
72+
}
73+
dfsCluster = new MiniDFSCluster.Builder(CONF).numDataNodes(2).build();
74+
remoteFs = dfsCluster.getFileSystem();
75+
} catch (IOException io) {
76+
throw new RuntimeException("problem starting mini dfs cluster", io);
77+
}
78+
}
79+
80+
@Test
81+
public void basicTest() throws IOException {
82+
TezCounters counters = new TezCounters();
83+
TaskCounterUpdater updater = new TaskCounterUpdater(counters, CONF, "pid");
84+
85+
DFSTestUtil.writeFile(remoteFs, new Path("/tmp/foo/abc.txt"), "xyz");
86+
87+
updater.updateCounters();
88+
LOG.info("Counters (after first update): {}", counters);
89+
assertCounter(counters, FileSystemCounter.OP_MKDIRS, 0); // DFSTestUtil doesn't call separate mkdirs
90+
assertCounter(counters, FileSystemCounter.OP_CREATE, 1);
91+
assertCounter(counters, FileSystemCounter.BYTES_WRITTEN, 3); // "xyz"
92+
assertCounter(counters, FileSystemCounter.WRITE_OPS, 1);
93+
assertCounter(counters, FileSystemCounter.OP_GET_FILE_STATUS, 1); // DFSTestUtil calls fs.exists
94+
assertCounter(counters, FileSystemCounter.OP_CREATE, 1);
95+
96+
DFSTestUtil.writeFile(remoteFs, new Path("/tmp/foo/abc1.txt"), "xyz");
97+
98+
updater.updateCounters();
99+
LOG.info("Counters (after second update): {}", counters);
100+
assertCounter(counters, FileSystemCounter.OP_CREATE, 2);
101+
assertCounter(counters, FileSystemCounter.BYTES_WRITTEN, 6); // "xyz" has been written twice
102+
assertCounter(counters, FileSystemCounter.WRITE_OPS, 2);
103+
assertCounter(counters, FileSystemCounter.OP_GET_FILE_STATUS, 2); // DFSTestUtil calls fs.exists again
104+
assertCounter(counters, FileSystemCounter.OP_CREATE, 2);
105+
106+
// Ensure all numbers are reset
107+
updater.updateCounters();
108+
LOG.info("Counters (after third update): {}", counters);
109+
// counter holds its value after clearStatistics + updateCounters
110+
assertCounter(counters, FileSystemCounter.OP_CREATE, 2);
111+
}
112+
113+
private void assertCounter(TezCounters counters, FileSystemCounter fsCounter, int value) {
114+
TezCounter counter = counters.findCounter(remoteFs.getScheme(), fsCounter);
115+
Assert.assertNotNull(counter);
116+
Assert.assertEquals(value, counter.getValue());
117+
}
118+
}

0 commit comments

Comments
 (0)