From 0be881dac5c851ac1b9609ff52a2368bf6bd0899 Mon Sep 17 00:00:00 2001 From: Stamatis Zampetakis Date: Thu, 14 Sep 2023 15:53:03 +0200 Subject: [PATCH 1/5] TEZ-4503: Warn about large conf properties in payload --- .../java/org/apache/tez/common/TezUtils.java | 23 +++++++++++++++++++ .../apache/tez/dag/api/TezConfiguration.java | 20 ++++++++++++++++ 2 files changed, 43 insertions(+) diff --git a/tez-api/src/main/java/org/apache/tez/common/TezUtils.java b/tez-api/src/main/java/org/apache/tez/common/TezUtils.java index 88920a4e53..cb6ff2bae4 100644 --- a/tez-api/src/main/java/org/apache/tez/common/TezUtils.java +++ b/tez-api/src/main/java/org/apache/tez/common/TezUtils.java @@ -29,6 +29,7 @@ import com.google.protobuf.ByteString; import com.google.protobuf.CodedInputStream; +import org.apache.tez.dag.api.TezConfiguration; import org.apache.tez.runtime.api.TaskContext; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -43,6 +44,11 @@ import org.xerial.snappy.SnappyInputStream; import org.xerial.snappy.SnappyOutputStream; +import static org.apache.tez.dag.api.TezConfiguration.TEZ_LOGGING_PROPERTY_MASK; +import static org.apache.tez.dag.api.TezConfiguration.TEZ_LOGGING_PROPERTY_MASK_DEFAULT; +import static org.apache.tez.dag.api.TezConfiguration.TEZ_LOGGING_PROPERTY_SIZE_THRESHOLD; +import static org.apache.tez.dag.api.TezConfiguration.TEZ_LOGGING_PROPERTY_SIZE_THRESHOLD_DEFAULT; + /** * Utility methods for setting up a DAG. Has helpers for setting up log4j configuration, converting * {@link org.apache.hadoop.conf.Configuration} to {@link org.apache.tez.dag.api.UserPayload} etc. @@ -51,6 +57,14 @@ public final class TezUtils { private static final Logger LOG = LoggerFactory.getLogger(TezUtils.class); + private static final int PROPERTY_THRESHOLD; + private static final boolean PROPERTY_MASK; + + static { + TezConfiguration c = new TezConfiguration(); + PROPERTY_THRESHOLD = c.getInt(TEZ_LOGGING_PROPERTY_SIZE_THRESHOLD, TEZ_LOGGING_PROPERTY_SIZE_THRESHOLD_DEFAULT); + PROPERTY_MASK = c.getBoolean(TEZ_LOGGING_PROPERTY_MASK, TEZ_LOGGING_PROPERTY_MASK_DEFAULT); + } private TezUtils() {} @@ -211,10 +225,19 @@ public static void populateConfProtoFromEntries(Iterable PROPERTY_THRESHOLD) { + LOG.warn("Property '{}' is unusually big ({} bytes); large payload may lead to OOM.", key, value.length()); + if (!PROPERTY_MASK) { + LOG.warn("Large property '{}': {}", key, value); + } + } + } } diff --git a/tez-api/src/main/java/org/apache/tez/dag/api/TezConfiguration.java b/tez-api/src/main/java/org/apache/tez/dag/api/TezConfiguration.java index 9e2e2d89cf..f4cbc82b83 100644 --- a/tez-api/src/main/java/org/apache/tez/dag/api/TezConfiguration.java +++ b/tez-api/src/main/java/org/apache/tez/dag/api/TezConfiguration.java @@ -1580,6 +1580,26 @@ public TezConfiguration(boolean loadDefaults) { TEZ_PREFIX + "generate.debug.artifacts"; public static final boolean TEZ_GENERATE_DEBUG_ARTIFACTS_DEFAULT = false; + /** + * Int value. Property size threshold (in bytes) for logging during payload serialization. Properties exceeding the + * threshold are considered unusually large and potentially problematic thus they should be logged. + */ + @ConfigurationScope(Scope.VERTEX) + @ConfigurationProperty(type="integer") + public static final String TEZ_LOGGING_PROPERTY_SIZE_THRESHOLD = + TEZ_PREFIX + "logging.property.size.threshold"; + public static final int TEZ_LOGGING_PROPERTY_SIZE_THRESHOLD_DEFAULT = 512 * 1024; + /** + * Boolean value. Whether property masking is enabled for logging. Properties may contain sensitive user information + * such as passwords, credentials, secrets, etc., so they shouldn't be logged unconditionally. When masking is + * enabled, the property value (content) is not displayed in the logs. + */ + @ConfigurationScope(Scope.VERTEX) + @ConfigurationProperty + public static final String TEZ_LOGGING_PROPERTY_MASK = + TEZ_PREFIX + "logging.property.mask"; + public static final boolean TEZ_LOGGING_PROPERTY_MASK_DEFAULT = true; + /** * Set of tasks for which specific launch command options need to be added. * Format: "vertexName[csv of task ids];vertexName[csv of task ids].." From 17e07e7c714d670cfbd5e15eb3c56f33a022f2ec Mon Sep 17 00:00:00 2001 From: Stamatis Zampetakis Date: Mon, 23 Oct 2023 15:16:58 +0200 Subject: [PATCH 2/5] Allow different logging threshold per configuration object Instead of using the central Tez configuration (tez-site.xml) for selecting the threshold and masking for logging large properties check for the respective entries in each individual configuration object. --- .../tez/common/LargePropertyLogger.java | 65 +++++++++++++++++++ .../java/org/apache/tez/common/TezUtils.java | 26 +------- .../main/java/org/apache/tez/dag/api/DAG.java | 5 +- 3 files changed, 71 insertions(+), 25 deletions(-) create mode 100644 tez-api/src/main/java/org/apache/tez/common/LargePropertyLogger.java diff --git a/tez-api/src/main/java/org/apache/tez/common/LargePropertyLogger.java b/tez-api/src/main/java/org/apache/tez/common/LargePropertyLogger.java new file mode 100644 index 0000000000..32f412b3f7 --- /dev/null +++ b/tez-api/src/main/java/org/apache/tez/common/LargePropertyLogger.java @@ -0,0 +1,65 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to you under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.tez.common; + +import org.apache.hadoop.conf.Configuration; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.util.Map; + +import static org.apache.tez.dag.api.TezConfiguration.TEZ_LOGGING_PROPERTY_MASK; +import static org.apache.tez.dag.api.TezConfiguration.TEZ_LOGGING_PROPERTY_MASK_DEFAULT; +import static org.apache.tez.dag.api.TezConfiguration.TEZ_LOGGING_PROPERTY_SIZE_THRESHOLD; +import static org.apache.tez.dag.api.TezConfiguration.TEZ_LOGGING_PROPERTY_SIZE_THRESHOLD_DEFAULT; + +public class LargePropertyLogger { + private static final Logger LOG = LoggerFactory.getLogger(LargePropertyLogger.class); + private final int threshold; + private final boolean mask; + + public static LargePropertyLogger from(Configuration c) { + return new LargePropertyLogger( + c.getInt(TEZ_LOGGING_PROPERTY_SIZE_THRESHOLD, TEZ_LOGGING_PROPERTY_SIZE_THRESHOLD_DEFAULT), + c.getBoolean(TEZ_LOGGING_PROPERTY_MASK, TEZ_LOGGING_PROPERTY_MASK_DEFAULT)); + } + + public static LargePropertyLogger from(Map c) { + String threshold = c.getOrDefault(TEZ_LOGGING_PROPERTY_SIZE_THRESHOLD, + String.valueOf(TEZ_LOGGING_PROPERTY_SIZE_THRESHOLD_DEFAULT)); + //TODO Not really 100% equivalent with Conf factory + String mask = c.getOrDefault(TEZ_LOGGING_PROPERTY_MASK, String.valueOf(TEZ_LOGGING_PROPERTY_MASK_DEFAULT)); + return new LargePropertyLogger(Integer.parseInt(threshold), Boolean.valueOf(mask)); + } + + private LargePropertyLogger(int threshold, boolean mask) { + this.threshold = threshold; + this.mask = mask; + } + + public Map.Entry logEntry(Map.Entry e) { + String key = e.getKey(); + String value = e.getValue(); + if (value.length() > threshold) { + LOG.warn("Property '{}' is unusually big ({} bytes); large payload may lead to OOM.", key, value.length()); + if (!mask) { + LOG.warn("Large property '{}': {}", key, value); + } + } + return e; + } +} diff --git a/tez-api/src/main/java/org/apache/tez/common/TezUtils.java b/tez-api/src/main/java/org/apache/tez/common/TezUtils.java index cb6ff2bae4..ed89f23e7d 100644 --- a/tez-api/src/main/java/org/apache/tez/common/TezUtils.java +++ b/tez-api/src/main/java/org/apache/tez/common/TezUtils.java @@ -26,10 +26,10 @@ import java.util.Map.Entry; import java.util.Objects; +import com.google.common.collect.Iterables; import com.google.protobuf.ByteString; import com.google.protobuf.CodedInputStream; -import org.apache.tez.dag.api.TezConfiguration; import org.apache.tez.runtime.api.TaskContext; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -44,11 +44,6 @@ import org.xerial.snappy.SnappyInputStream; import org.xerial.snappy.SnappyOutputStream; -import static org.apache.tez.dag.api.TezConfiguration.TEZ_LOGGING_PROPERTY_MASK; -import static org.apache.tez.dag.api.TezConfiguration.TEZ_LOGGING_PROPERTY_MASK_DEFAULT; -import static org.apache.tez.dag.api.TezConfiguration.TEZ_LOGGING_PROPERTY_SIZE_THRESHOLD; -import static org.apache.tez.dag.api.TezConfiguration.TEZ_LOGGING_PROPERTY_SIZE_THRESHOLD_DEFAULT; - /** * Utility methods for setting up a DAG. Has helpers for setting up log4j configuration, converting * {@link org.apache.hadoop.conf.Configuration} to {@link org.apache.tez.dag.api.UserPayload} etc. @@ -57,14 +52,6 @@ public final class TezUtils { private static final Logger LOG = LoggerFactory.getLogger(TezUtils.class); - private static final int PROPERTY_THRESHOLD; - private static final boolean PROPERTY_MASK; - - static { - TezConfiguration c = new TezConfiguration(); - PROPERTY_THRESHOLD = c.getInt(TEZ_LOGGING_PROPERTY_SIZE_THRESHOLD, TEZ_LOGGING_PROPERTY_SIZE_THRESHOLD_DEFAULT); - PROPERTY_MASK = c.getBoolean(TEZ_LOGGING_PROPERTY_MASK, TEZ_LOGGING_PROPERTY_MASK_DEFAULT); - } private TezUtils() {} @@ -171,7 +158,7 @@ public static Configuration createConfFromUserPayload(UserPayload payload) throw private static void writeConfInPB(OutputStream dos, Configuration conf) throws IOException { DAGProtos.ConfigurationProto.Builder confProtoBuilder = DAGProtos.ConfigurationProto.newBuilder(); - populateConfProtoFromEntries(conf, confProtoBuilder); + populateConfProtoFromEntries(Iterables.transform(conf, LargePropertyLogger.from(conf)::logEntry), confProtoBuilder); DAGProtos.ConfigurationProto confProto = confProtoBuilder.build(); confProto.writeTo(dos); } @@ -225,19 +212,10 @@ public static void populateConfProtoFromEntries(Iterable PROPERTY_THRESHOLD) { - LOG.warn("Property '{}' is unusually big ({} bytes); large payload may lead to OOM.", key, value.length()); - if (!PROPERTY_MASK) { - LOG.warn("Large property '{}': {}", key, value); - } - } - } } diff --git a/tez-api/src/main/java/org/apache/tez/dag/api/DAG.java b/tez-api/src/main/java/org/apache/tez/dag/api/DAG.java index 0864b82e80..9df2817e2d 100644 --- a/tez-api/src/main/java/org/apache/tez/dag/api/DAG.java +++ b/tez-api/src/main/java/org/apache/tez/dag/api/DAG.java @@ -35,6 +35,7 @@ import java.util.Stack; import java.util.Objects; +import com.google.common.collect.Iterables; import org.apache.commons.collections4.BidiMap; import org.apache.commons.collections4.bidimap.DualLinkedHashBidiMap; import org.apache.hadoop.classification.InterfaceStability; @@ -42,6 +43,7 @@ import org.apache.hadoop.yarn.util.ConverterUtils; import org.apache.tez.client.CallerContext; import org.apache.tez.common.JavaOptsChecker; +import org.apache.tez.common.LargePropertyLogger; import org.apache.tez.common.TezUtils; import org.apache.tez.dag.api.Vertex.VertexExecutionContext; import org.apache.tez.dag.api.records.DAGProtos; @@ -983,7 +985,8 @@ public synchronized DAGPlan createDag(Configuration tezConf, Credentials extraCr if (vertex.getConf()!= null && vertex.getConf().size() > 0) { ConfigurationProto.Builder confBuilder = ConfigurationProto.newBuilder(); - TezUtils.populateConfProtoFromEntries(vertex.getConf().entrySet(), confBuilder); + TezUtils.populateConfProtoFromEntries(Iterables.transform(vertex.getConf().entrySet(), + LargePropertyLogger.from(vertex.getConf())::logEntry), confBuilder); vertexBuilder.setVertexConf(confBuilder); } From 687bd3b22f26288c0f49d40d05588d4f17a7b2fa Mon Sep 17 00:00:00 2001 From: Stamatis Zampetakis Date: Tue, 24 Oct 2023 13:11:30 +0200 Subject: [PATCH 3/5] Refactor to use native JDK APIs instead of Guava --- ...pertyLogger.java => LargeEntryLogger.java} | 30 ++++++++------ .../java/org/apache/tez/common/TezUtils.java | 41 +++++++++++-------- .../main/java/org/apache/tez/dag/api/DAG.java | 8 +--- 3 files changed, 45 insertions(+), 34 deletions(-) rename tez-api/src/main/java/org/apache/tez/common/{LargePropertyLogger.java => LargeEntryLogger.java} (71%) diff --git a/tez-api/src/main/java/org/apache/tez/common/LargePropertyLogger.java b/tez-api/src/main/java/org/apache/tez/common/LargeEntryLogger.java similarity index 71% rename from tez-api/src/main/java/org/apache/tez/common/LargePropertyLogger.java rename to tez-api/src/main/java/org/apache/tez/common/LargeEntryLogger.java index 32f412b3f7..488156e9a0 100644 --- a/tez-api/src/main/java/org/apache/tez/common/LargePropertyLogger.java +++ b/tez-api/src/main/java/org/apache/tez/common/LargeEntryLogger.java @@ -21,45 +21,51 @@ import org.slf4j.LoggerFactory; import java.util.Map; +import java.util.function.Consumer; import static org.apache.tez.dag.api.TezConfiguration.TEZ_LOGGING_PROPERTY_MASK; import static org.apache.tez.dag.api.TezConfiguration.TEZ_LOGGING_PROPERTY_MASK_DEFAULT; import static org.apache.tez.dag.api.TezConfiguration.TEZ_LOGGING_PROPERTY_SIZE_THRESHOLD; import static org.apache.tez.dag.api.TezConfiguration.TEZ_LOGGING_PROPERTY_SIZE_THRESHOLD_DEFAULT; -public class LargePropertyLogger { - private static final Logger LOG = LoggerFactory.getLogger(LargePropertyLogger.class); +/** + * A configurable logger for large configuration/payload entries. + */ +public class LargeEntryLogger implements Consumer> { + private static final Logger LOG = LoggerFactory.getLogger(LargeEntryLogger.class); private final int threshold; private final boolean mask; - public static LargePropertyLogger from(Configuration c) { - return new LargePropertyLogger( + public static LargeEntryLogger from(Configuration c) { + return new LargeEntryLogger( c.getInt(TEZ_LOGGING_PROPERTY_SIZE_THRESHOLD, TEZ_LOGGING_PROPERTY_SIZE_THRESHOLD_DEFAULT), c.getBoolean(TEZ_LOGGING_PROPERTY_MASK, TEZ_LOGGING_PROPERTY_MASK_DEFAULT)); } - public static LargePropertyLogger from(Map c) { + public static LargeEntryLogger from(Map c) { String threshold = c.getOrDefault(TEZ_LOGGING_PROPERTY_SIZE_THRESHOLD, String.valueOf(TEZ_LOGGING_PROPERTY_SIZE_THRESHOLD_DEFAULT)); - //TODO Not really 100% equivalent with Conf factory String mask = c.getOrDefault(TEZ_LOGGING_PROPERTY_MASK, String.valueOf(TEZ_LOGGING_PROPERTY_MASK_DEFAULT)); - return new LargePropertyLogger(Integer.parseInt(threshold), Boolean.valueOf(mask)); + return new LargeEntryLogger(Integer.parseInt(threshold), Boolean.parseBoolean(mask)); } - private LargePropertyLogger(int threshold, boolean mask) { + private LargeEntryLogger(int threshold, boolean mask) { this.threshold = threshold; this.mask = mask; } - public Map.Entry logEntry(Map.Entry e) { + public void accept(Map.Entry e) { String key = e.getKey(); String value = e.getValue(); + if (value == null) { + LOG.debug("Skipping entry '{}' cause value is null.", key); + return; + } if (value.length() > threshold) { - LOG.warn("Property '{}' is unusually big ({} bytes); large payload may lead to OOM.", key, value.length()); + LOG.warn("Entry '{}' is unusually big ({} bytes); large entries may lead to OOM.", key, value.length()); if (!mask) { - LOG.warn("Large property '{}': {}", key, value); + LOG.warn("Large entry '{}': {}", key, value); } } - return e; } } diff --git a/tez-api/src/main/java/org/apache/tez/common/TezUtils.java b/tez-api/src/main/java/org/apache/tez/common/TezUtils.java index ed89f23e7d..373d4d9d55 100644 --- a/tez-api/src/main/java/org/apache/tez/common/TezUtils.java +++ b/tez-api/src/main/java/org/apache/tez/common/TezUtils.java @@ -25,8 +25,9 @@ import java.util.Map; import java.util.Map.Entry; import java.util.Objects; +import java.util.stream.Stream; +import java.util.stream.StreamSupport; -import com.google.common.collect.Iterables; import com.google.protobuf.ByteString; import com.google.protobuf.CodedInputStream; @@ -155,10 +156,10 @@ public static Configuration createConfFromUserPayload(UserPayload payload) throw return createConfFromByteString(ByteString.copyFrom(payload.getPayload())); } - private static void writeConfInPB(OutputStream dos, Configuration conf) throws IOException { DAGProtos.ConfigurationProto.Builder confProtoBuilder = DAGProtos.ConfigurationProto.newBuilder(); - populateConfProtoFromEntries(Iterables.transform(conf, LargePropertyLogger.from(conf)::logEntry), confProtoBuilder); + populateConfProtoFromStream(StreamSupport.stream(conf.spliterator(), false).peek(LargeEntryLogger.from(conf)), + confProtoBuilder); DAGProtos.ConfigurationProto confProto = confProtoBuilder.build(); confProto.writeTo(dos); } @@ -200,22 +201,30 @@ public static String convertToHistoryText(Configuration conf) { return convertToHistoryText(null, conf); } + /** + * Copy each entry with non-null value from the specified map to the configuration builder. + *

Implementation detail: For debugging purposes, this method can be configured to log large entries.

+ */ + public static void populateConfProtoFromMap(Map map, + DAGProtos.ConfigurationProto.Builder confBuilder) { + populateConfProtoFromStream(map.entrySet().stream().peek(LargeEntryLogger.from(map)), confBuilder); + } - /* Copy each Map.Entry with non-null value to DAGProtos.ConfigurationProto */ + /** + * Copy each entry with non-null value to the specified configuration builder. + * + * @deprecated Use {@link #populateConfProtoFromMap(Map, DAGProtos.ConfigurationProto.Builder)} instead. + */ + @Deprecated public static void populateConfProtoFromEntries(Iterable> params, DAGProtos.ConfigurationProto.Builder confBuilder) { - for(Map.Entry entry : params) { - String key = entry.getKey(); - String val = entry.getValue(); - if(val != null) { - DAGProtos.PlanKeyValuePair.Builder kvp = DAGProtos.PlanKeyValuePair.newBuilder(); - kvp.setKey(key); - kvp.setValue(val); - confBuilder.addConfKeyValues(kvp); - } else { - LOG.debug("null value for key={}. Skipping.", key); - } - } + populateConfProtoFromStream(StreamSupport.stream(params.spliterator(), false), confBuilder); } + private static void populateConfProtoFromStream(Stream> entries, + DAGProtos.ConfigurationProto.Builder proto) { + entries.filter(e -> e.getValue() != null) + .map(e -> DAGProtos.PlanKeyValuePair.newBuilder().setKey(e.getKey()).setValue(e.getValue()).build()) + .forEach(proto::addConfKeyValues); + } } diff --git a/tez-api/src/main/java/org/apache/tez/dag/api/DAG.java b/tez-api/src/main/java/org/apache/tez/dag/api/DAG.java index 9df2817e2d..f574b25d02 100644 --- a/tez-api/src/main/java/org/apache/tez/dag/api/DAG.java +++ b/tez-api/src/main/java/org/apache/tez/dag/api/DAG.java @@ -35,7 +35,6 @@ import java.util.Stack; import java.util.Objects; -import com.google.common.collect.Iterables; import org.apache.commons.collections4.BidiMap; import org.apache.commons.collections4.bidimap.DualLinkedHashBidiMap; import org.apache.hadoop.classification.InterfaceStability; @@ -43,7 +42,6 @@ import org.apache.hadoop.yarn.util.ConverterUtils; import org.apache.tez.client.CallerContext; import org.apache.tez.common.JavaOptsChecker; -import org.apache.tez.common.LargePropertyLogger; import org.apache.tez.common.TezUtils; import org.apache.tez.dag.api.Vertex.VertexExecutionContext; import org.apache.tez.dag.api.records.DAGProtos; @@ -62,7 +60,6 @@ import org.apache.tez.common.TezYARNUtils; import org.apache.tez.dag.api.EdgeProperty.DataMovementType; import org.apache.tez.dag.api.EdgeProperty.DataSourceType; -import org.apache.tez.dag.api.EdgeProperty.SchedulingType; import org.apache.tez.dag.api.VertexGroup.GroupInfo; import org.apache.tez.dag.api.records.DAGProtos.ConfigurationProto; import org.apache.tez.dag.api.records.DAGProtos.DAGPlan; @@ -985,8 +982,7 @@ public synchronized DAGPlan createDag(Configuration tezConf, Credentials extraCr if (vertex.getConf()!= null && vertex.getConf().size() > 0) { ConfigurationProto.Builder confBuilder = ConfigurationProto.newBuilder(); - TezUtils.populateConfProtoFromEntries(Iterables.transform(vertex.getConf().entrySet(), - LargePropertyLogger.from(vertex.getConf())::logEntry), confBuilder); + TezUtils.populateConfProtoFromMap(vertex.getConf(), confBuilder); vertexBuilder.setVertexConf(confBuilder); } @@ -1087,7 +1083,7 @@ public synchronized DAGPlan createDag(Configuration tezConf, Credentials extraCr ConfigurationProto.Builder confProtoBuilder = ConfigurationProto.newBuilder(); if (!this.dagConf.isEmpty()) { - TezUtils.populateConfProtoFromEntries(this.dagConf.entrySet(), confProtoBuilder); + TezUtils.populateConfProtoFromMap(this.dagConf, confProtoBuilder); } // Copy historyLogLevel from tezConf into dagConf if its not overridden in dagConf. String logLevel = this.dagConf.get(TezConfiguration.TEZ_HISTORY_LOGGING_LOGLEVEL); From d8215af3d6789b6dfad88676dfd0cc19c6612d34 Mon Sep 17 00:00:00 2001 From: Stamatis Zampetakis Date: Tue, 24 Oct 2023 13:13:45 +0200 Subject: [PATCH 4/5] Add newline between new properties --- .../src/main/java/org/apache/tez/dag/api/TezConfiguration.java | 1 + 1 file changed, 1 insertion(+) diff --git a/tez-api/src/main/java/org/apache/tez/dag/api/TezConfiguration.java b/tez-api/src/main/java/org/apache/tez/dag/api/TezConfiguration.java index f4cbc82b83..90b478cce2 100644 --- a/tez-api/src/main/java/org/apache/tez/dag/api/TezConfiguration.java +++ b/tez-api/src/main/java/org/apache/tez/dag/api/TezConfiguration.java @@ -1589,6 +1589,7 @@ public TezConfiguration(boolean loadDefaults) { public static final String TEZ_LOGGING_PROPERTY_SIZE_THRESHOLD = TEZ_PREFIX + "logging.property.size.threshold"; public static final int TEZ_LOGGING_PROPERTY_SIZE_THRESHOLD_DEFAULT = 512 * 1024; + /** * Boolean value. Whether property masking is enabled for logging. Properties may contain sensitive user information * such as passwords, credentials, secrets, etc., so they shouldn't be logged unconditionally. When masking is From 1253d188cd3b32bdcf8ac898f59c4444667e7567 Mon Sep 17 00:00:00 2001 From: Stamatis Zampetakis Date: Fri, 27 Oct 2023 16:28:46 +0200 Subject: [PATCH 5/5] Add tests for new method and to check logs content --- pom.xml | 1 + .../org/apache/tez/common/TestTezUtils.java | 38 +++++++++++++++++++ .../src/test/resources/log4j.properties | 5 ++- 3 files changed, 43 insertions(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index a339d097e9..28f6cc516b 100644 --- a/pom.xml +++ b/pom.xml @@ -927,6 +927,7 @@ ${test.build.data} true ${hadoop.version} + ${project.build.directory} diff --git a/tez-common/src/test/java/org/apache/tez/common/TestTezUtils.java b/tez-common/src/test/java/org/apache/tez/common/TestTezUtils.java index d599cafd76..c046047a55 100644 --- a/tez-common/src/test/java/org/apache/tez/common/TestTezUtils.java +++ b/tez-common/src/test/java/org/apache/tez/common/TestTezUtils.java @@ -24,11 +24,17 @@ import static org.junit.Assert.assertEquals; import java.io.IOException; +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.Paths; import java.util.BitSet; import java.util.HashMap; +import java.util.List; import java.util.Map; import java.util.Random; +import com.google.common.collect.ImmutableMap; +import com.google.common.collect.Lists; import org.apache.hadoop.conf.Configuration; import org.apache.tez.dag.api.TezConfiguration; import org.apache.tez.dag.api.UserPayload; @@ -41,6 +47,7 @@ import com.google.protobuf.ByteString; public class TestTezUtils { + private static final Path LOG_PATH = Paths.get(System.getProperty("project.build.directory"), "tez-test.log"); @Test (timeout=2000) public void testByteStringToAndFromConf() throws IOException { @@ -103,6 +110,18 @@ public void testByteStringAddToLargeConf() throws IOException { Assert.assertEquals(conf.get("testLargeValue"), largeValue); } + @Test public void testByteStringFromConfEmitsLogForLargeEntry() throws IOException { + Configuration conf = new Configuration(false); + conf.set(TezConfiguration.TEZ_LOGGING_PROPERTY_SIZE_THRESHOLD, "10"); + conf.set(TezConfiguration.TEZ_LOGGING_PROPERTY_MASK, "false"); + conf.set("tez.fake.property.id00", "ABCDEFGHIJK"); + TezUtils.createByteStringFromConf(conf); + List logLines = Lists.reverse(Files.readAllLines(LOG_PATH)); + assertEquals("Entry 'tez.fake.property.id00' is unusually big (11 bytes); large entries may lead to OOM.", + logLines.get(1)); + assertEquals("Large entry 'tez.fake.property.id00': ABCDEFGHIJK", logLines.get(0)); + } + @Test (timeout=2000) public void testPayloadToAndFromConf() throws IOException { Configuration conf = getConf(); @@ -294,4 +313,23 @@ public void testPopulateConfProtoFromEntries() { assertEquals(confBuilder.getConfKeyValuesList().size(), 1); } + @Test public void testPopulateConfProtoFromMapSetsKeyValuePair() { + DAGProtos.ConfigurationProto.Builder builder = DAGProtos.ConfigurationProto.newBuilder(); + TezUtils.populateConfProtoFromMap(ImmutableMap.of("tez.fake.property", "someValue"), builder); + assertEquals(builder.getConfKeyValues(0).getKey(), "tez.fake.property"); + assertEquals(builder.getConfKeyValues(0).getValue(), "someValue"); + } + + @Test public void testPopulateConfProtoFromMapEmitsLogForLargeEntry() throws IOException { + Map map = new HashMap<>(); + map.put(TezConfiguration.TEZ_LOGGING_PROPERTY_SIZE_THRESHOLD, "10"); + map.put(TezConfiguration.TEZ_LOGGING_PROPERTY_MASK, "false"); + map.put("tez.fake.property.id01", "ABCDEFGHIJK"); + TezUtils.populateConfProtoFromMap(map, DAGProtos.ConfigurationProto.newBuilder()); + List logLines = Lists.reverse(Files.readAllLines(LOG_PATH)); + assertEquals("Entry 'tez.fake.property.id01' is unusually big (11 bytes); large entries may lead to OOM.", + logLines.get(1)); + assertEquals("Large entry 'tez.fake.property.id01': ABCDEFGHIJK", logLines.get(0)); + } + } diff --git a/tez-common/src/test/resources/log4j.properties b/tez-common/src/test/resources/log4j.properties index 531b68b5a9..3ab859a12c 100644 --- a/tez-common/src/test/resources/log4j.properties +++ b/tez-common/src/test/resources/log4j.properties @@ -12,8 +12,11 @@ # log4j configuration used during build and unit tests -log4j.rootLogger=info,stdout +log4j.rootLogger=info,stdout,r log4j.threshhold=ALL log4j.appender.stdout=org.apache.log4j.ConsoleAppender log4j.appender.stdout.layout=org.apache.log4j.PatternLayout log4j.appender.stdout.layout.ConversionPattern=%d{ISO8601} %-5p [%t] %c{2} (%F:%M(%L)) - %m%n +log4j.appender.r=org.apache.log4j.RollingFileAppender +log4j.appender.r.File=${project.build.directory}/tez-test.log +log4j.appender.r.layout=org.apache.log4j.PatternLayout