Skip to content

Commit fb2a10e

Browse files
Mehakmeet Singhsteveloughran
authored andcommitted
HADOOP-18257. S3AAuditLogMergerAndParser changes
* dest is a path to the avro file to create containing all of the files parsed. * tests updated with changed API Change-Id: I810b165a4c6d2354bc91523594c7a24a4c031aee HADOOP-18257. audit log: ParsedAuditLogEntry Change-Id: Iba0d57357cc04aad571f9f0f4d69ee7f1dd11712 HADOOP-18257. audit log * moved avro source * WiP of a serializable/writable object for easy use in MR, spark Change-Id: I56bbff0ad2d71b64984f4cc59e1f2c83ea25597a HADOOP-18257. spotbugs Change-Id: Ia0ab414bcca8a898eca95fe9ddd89b5bfcef3028 HADOOP-18257 audit log parser * tool is invoked through hadoop s3guard command * which can now also be invoked as "hadoop s3a"! * tests are improved. * OperationDuration implements DurationTracker for bit more completeness TODO * split out the record parsing into a hadoop MR record read/write * so support large scale parsing * cli tool just glues that together either for small parallelised extraction or for aggregation to one file. * but a bulk job would work with a larger dataset Change-Id: I25e333592d1058b460b0bfda5313a20de13c2e35 HADOOP-18257 audit log parser Initial round of changes * only log at debug during parsing * cli to take <path to source> <path of output file> where the output is a filename not a dir. Change-Id: Ibdba37e42a36b1933979d2a1e7265d07069ea0bd HADOOP-18257. Merging and Parsing S3A audit logs into Avro format for analysis.
1 parent 317db31 commit fb2a10e

17 files changed

Lines changed: 1808 additions & 3 deletions

File tree

hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/DurationInfo.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -93,7 +93,7 @@ public String toString() {
9393

9494
@Override
9595
public void close() {
96-
finished();
96+
super.close();
9797
if (logAtInfo) {
9898
log.info("{}", this);
9999
} else {

hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/OperationDuration.java

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,13 +22,14 @@
2222

2323
import org.apache.hadoop.classification.InterfaceAudience;
2424
import org.apache.hadoop.classification.InterfaceStability;
25+
import org.apache.hadoop.fs.statistics.DurationTracker;
2526

2627
/**
2728
* Little duration counter.
2829
*/
2930
@InterfaceAudience.Public
3031
@InterfaceStability.Unstable
31-
public class OperationDuration {
32+
public class OperationDuration implements DurationTracker {
3233

3334
/**
3435
* Time in millis when the operation started.
@@ -65,6 +66,16 @@ public void finished() {
6566
finished = time();
6667
}
6768

69+
@Override
70+
public void failed() {
71+
finished();
72+
}
73+
74+
@Override
75+
public void close() {
76+
finished();
77+
}
78+
6879
/**
6980
* Return the duration as {@link #humanTime(long)}.
7081
* @return a printable duration.

hadoop-tools/hadoop-aws/dev-support/findbugs-exclude.xml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -86,4 +86,9 @@
8686
<Method name="submit"/>
8787
<Bug pattern="RV_RETURN_VALUE_IGNORED_BAD_PRACTICE"/>
8888
</Match>
89+
90+
<Match>
91+
<Class name="org.apache.hadoop.fs.s3a.audit.AvroS3LogEntryRecord"/>
92+
<Bug pattern="NP_NULL_INSTANCEOF"/>
93+
</Match>
8994
</FindBugsFilter>

hadoop-tools/hadoop-aws/pom.xml

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -460,6 +460,7 @@
460460
<exclusion>org.apache.hadoop.fs.s3a.commit.impl.*</exclusion>
461461
<exclusion>org.apache.hadoop.fs.s3a.commit.magic.*</exclusion>
462462
<exclusion>org.apache.hadoop.fs.s3a.commit.staging.*</exclusion>
463+
<exclusion>org.apache.hadoop.fs.s3a.audit.mapreduce.*</exclusion>
463464
</exclusions>
464465
<bannedImports>
465466
<bannedImport>org.apache.hadoop.mapreduce.**</bannedImport>
@@ -481,6 +482,25 @@
481482
</execution>
482483
</executions>
483484
</plugin>
485+
486+
<!-- create the avro records for the audit log parser -->
487+
<plugin>
488+
<groupId>org.apache.avro</groupId>
489+
<artifactId>avro-maven-plugin</artifactId>
490+
<executions>
491+
<execution>
492+
<id>generate-avro-sources</id>
493+
<phase>generate-sources</phase>
494+
<goals>
495+
<goal>schema</goal>
496+
</goals>
497+
</execution>
498+
</executions>
499+
<configuration>
500+
<sourceDirectory>src/main/avro</sourceDirectory>
501+
<outputDirectory>${project.build.directory}/generated-sources/avro</outputDirectory>
502+
</configuration>
503+
</plugin>
484504
</plugins>
485505
</build>
486506

Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,48 @@
1+
// Licensed to the Apache Software Foundation (ASF) under one
2+
// or more contributor license agreements. See the NOTICE file
3+
// distributed with this work for additional information
4+
// regarding copyright ownership. The ASF licenses this file
5+
// to you under the Apache License, Version 2.0 (the
6+
// "License"); you may not use this file except in compliance
7+
// with the License. You may obtain a copy of the License at
8+
//
9+
// http://www.apache.org/licenses/LICENSE-2.0
10+
//
11+
// Unless required by applicable law or agreed to in writing, software
12+
// distributed under the License is distributed on an "AS IS" BASIS,
13+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
// See the License for the specific language governing permissions and
15+
// limitations under the License.
16+
17+
{
18+
"type" : "record", "name" : "AvroS3LogEntryRecord",
19+
"namespace" : "org.apache.hadoop.fs.s3a.audit",
20+
"fields" : [
21+
{ "name" : "owner", "type" : "string" },
22+
{ "name" : "bucket", "type" : "string" },
23+
{ "name" : "timestamp", "type" : "string" },
24+
{ "name" : "remoteip", "type" : "string" },
25+
{ "name" : "requester", "type" : "string" },
26+
{ "name" : "requestid", "type" : "string" },
27+
{ "name" : "verb", "type" : "string" },
28+
{ "name" : "key", "type" : "string" },
29+
{ "name" : "requesturi", "type" : "string" },
30+
{ "name" : "http", "type" : "string" },
31+
{ "name" : "awserrorcode", "type" : "string" },
32+
{ "name" : "bytessent", "type" : ["long", "null"] },
33+
{ "name" : "objectsize", "type" : ["long", "null"] },
34+
{ "name" : "totaltime", "type" : ["long", "null"] },
35+
{ "name" : "turnaroundtime" , "type" : ["long", "null"] },
36+
{ "name" : "referrer", "type" : "string" },
37+
{ "name" : "useragent", "type" : "string" },
38+
{ "name" : "version", "type" : "string" },
39+
{ "name" : "hostid", "type" : "string" },
40+
{ "name" : "sigv", "type" : "string" },
41+
{ "name" : "cypher", "type" : "string" },
42+
{ "name" : "auth", "type" : "string" },
43+
{ "name" : "endpoint", "type" : "string" },
44+
{ "name" : "tls", "type" : "string" },
45+
{ "name" : "tail", "type" : "string" },
46+
{ "name" : "referrerMap", "type" : {"type": "map", "values": "string"} }
47+
]
48+
}
Lines changed: 171 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,171 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one
3+
* or more contributor license agreements. See the NOTICE file
4+
* distributed with this work for additional information
5+
* regarding copyright ownership. The ASF licenses this file
6+
* to you under the Apache License, Version 2.0 (the
7+
* "License"); you may not use this file except in compliance
8+
* with the License. You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing, software
13+
* distributed under the License is distributed on an "AS IS" BASIS,
14+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15+
* See the License for the specific language governing permissions and
16+
* limitations under the License.
17+
*/
18+
19+
package org.apache.hadoop.fs.s3a.audit;
20+
21+
import java.io.IOException;
22+
import java.io.PrintStream;
23+
import java.util.Arrays;
24+
import java.util.List;
25+
26+
import org.slf4j.Logger;
27+
import org.slf4j.LoggerFactory;
28+
29+
import org.apache.hadoop.conf.Configuration;
30+
import org.apache.hadoop.fs.FileSystem;
31+
import org.apache.hadoop.fs.Path;
32+
import org.apache.hadoop.fs.s3a.audit.mapreduce.S3AAuditLogMergerAndParser;
33+
import org.apache.hadoop.fs.s3a.s3guard.S3GuardTool;
34+
import org.apache.hadoop.util.ExitUtil;
35+
36+
import static org.apache.hadoop.service.launcher.LauncherExitCodes.EXIT_COMMAND_ARGUMENT_ERROR;
37+
import static org.apache.hadoop.service.launcher.LauncherExitCodes.EXIT_FAIL;
38+
import static org.apache.hadoop.service.launcher.LauncherExitCodes.EXIT_SUCCESS;
39+
40+
/**
41+
* AuditTool is a Command Line Interface.
42+
* Its functionality is to parse the audit log files
43+
* and generate avro file.
44+
*/
45+
public class AuditTool extends S3GuardTool {
46+
47+
private static final Logger LOG = LoggerFactory.getLogger(AuditTool.class);
48+
49+
/**
50+
* Name of audit tool: {@value}.
51+
*/
52+
public static final String AUDIT = "audit";
53+
54+
55+
/**
56+
* Name of this tool: {@value}.
57+
*/
58+
public static final String AUDIT_TOOL =
59+
"org.apache.hadoop.fs.s3a.audit.AuditTool";
60+
61+
/**
62+
* Purpose of this tool: {@value}.
63+
*/
64+
public static final String PURPOSE =
65+
"\n\nUSAGE:\nMerge and parse audit log files and convert into avro files "
66+
+ "for "
67+
+ "better "
68+
+ "visualization";
69+
70+
// Exit codes
71+
private static final int SUCCESS = EXIT_SUCCESS;
72+
73+
private static final int FAILURE = EXIT_FAIL;
74+
75+
private static final int INVALID_ARGUMENT = EXIT_COMMAND_ARGUMENT_ERROR;
76+
77+
private static final int SAMPLE = 500;
78+
79+
private static final String USAGE =
80+
"hadoop " + AUDIT_TOOL +
81+
" <path of source files>" +
82+
" <path of output file>"
83+
+ "\n";
84+
85+
private PrintStream out;
86+
87+
public AuditTool(final Configuration conf) {
88+
super(conf);
89+
}
90+
91+
/**
92+
* Tells us the usage of the AuditTool by commands.
93+
* @return the string USAGE
94+
*/
95+
public String getUsage() {
96+
return USAGE + PURPOSE;
97+
}
98+
99+
public String getName() {
100+
return AUDIT_TOOL;
101+
}
102+
103+
/**
104+
* This run method in AuditTool takes source and destination path of bucket,
105+
* and checks if there are directories and pass these paths to merge and
106+
* parse audit log files.
107+
* @param args argument list
108+
* @param stream output stream
109+
* @return SUCCESS i.e, '0', which is an exit code
110+
* @throws Exception on any failure.
111+
*/
112+
@Override
113+
public int run(final String[] args, final PrintStream stream)
114+
throws ExitUtil.ExitException, Exception {
115+
116+
this.out = stream;
117+
118+
preConditionArgsSizeCheck(args);
119+
List<String> paths = Arrays.asList(args);
120+
121+
// Path of audit log files
122+
Path logsPath = new Path(paths.get(0));
123+
124+
// Path of destination file
125+
Path destPath = new Path(paths.get(1));
126+
127+
final S3AAuditLogMergerAndParser auditLogMergerAndParser =
128+
new S3AAuditLogMergerAndParser(getConf(), SAMPLE);
129+
130+
// Calls S3AAuditLogMergerAndParser for implementing merging, passing of
131+
// audit log files and converting into avro file
132+
boolean mergeAndParseResult =
133+
auditLogMergerAndParser.mergeAndParseAuditLogFiles(
134+
logsPath, destPath);
135+
if (!mergeAndParseResult) {
136+
return FAILURE;
137+
}
138+
139+
return SUCCESS;
140+
}
141+
142+
private void preConditionArgsSizeCheck(String[] args) {
143+
if (args.length != 2) {
144+
errorln(getUsage());
145+
throw invalidArgs("Invalid number of arguments");
146+
}
147+
}
148+
149+
150+
/**
151+
* Flush all active output channels, including {@code System.err},
152+
* so as to stay in sync with any JRE log messages.
153+
*/
154+
private void flush() {
155+
if (out != null) {
156+
out.flush();
157+
} else {
158+
System.out.flush();
159+
}
160+
System.err.flush();
161+
}
162+
163+
164+
public void closeOutput() throws IOException {
165+
flush();
166+
if (out != null) {
167+
out.close();
168+
}
169+
}
170+
171+
}

0 commit comments

Comments
 (0)