Skip to content

Commit a72b963

Browse files
MaxGekkholdenk
authored andcommitted
[SPARK-26191][SQL] Control truncation of Spark plans via maxFields parameter
## What changes were proposed in this pull request? In the PR, I propose to add `maxFields` parameter to all functions involved in creation of textual representation of spark plans such as `simpleString` and `verboseString`. New parameter restricts number of fields converted to truncated strings. Any elements beyond the limit will be dropped and replaced by a `"... N more fields"` placeholder. The threshold is bumped up to `Int.MaxValue` for `toFile()`. ## How was this patch tested? Added a test to `QueryExecutionSuite` which checks `maxFields` impacts on number of truncated fields in `LocalRelation`. Closes apache#23159 from MaxGekk/to-file-max-fields. Lead-authored-by: Maxim Gekk <max.gekk@gmail.com> Co-authored-by: Maxim Gekk <maxim.gekk@databricks.com> Signed-off-by: Herman van Hovell <hvanhovell@databricks.com>
1 parent 46913ce commit a72b963

43 files changed

Lines changed: 203 additions & 126 deletions

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -979,7 +979,7 @@ class Analyzer(
979979
a.mapExpressions(resolveExpressionTopDown(_, appendColumns))
980980

981981
case q: LogicalPlan =>
982-
logTrace(s"Attempting to resolve ${q.simpleString}")
982+
logTrace(s"Attempting to resolve ${q.simpleString(SQLConf.get.maxToStringFields)}")
983983
q.mapExpressions(resolveExpressionTopDown(_, q))
984984
}
985985

@@ -1777,7 +1777,7 @@ class Analyzer(
17771777

17781778
case p if p.expressions.exists(hasGenerator) =>
17791779
throw new AnalysisException("Generators are not supported outside the SELECT clause, but " +
1780-
"got: " + p.simpleString)
1780+
"got: " + p.simpleString(SQLConf.get.maxToStringFields))
17811781
}
17821782
}
17831783

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@ import org.apache.spark.sql.catalyst.expressions.aggregate.AggregateExpression
2525
import org.apache.spark.sql.catalyst.optimizer.BooleanSimplification
2626
import org.apache.spark.sql.catalyst.plans._
2727
import org.apache.spark.sql.catalyst.plans.logical._
28+
import org.apache.spark.sql.internal.SQLConf
2829
import org.apache.spark.sql.types._
2930

3031
/**
@@ -303,7 +304,7 @@ trait CheckAnalysis extends PredicateHelper {
303304
val missingAttributes = o.missingInput.mkString(",")
304305
val input = o.inputSet.mkString(",")
305306
val msgForMissingAttributes = s"Resolved attribute(s) $missingAttributes missing " +
306-
s"from $input in operator ${operator.simpleString}."
307+
s"from $input in operator ${operator.simpleString(SQLConf.get.maxToStringFields)}."
307308

308309
val resolver = plan.conf.resolver
309310
val attrsWithSameName = o.missingInput.filter { missing =>
@@ -368,7 +369,7 @@ trait CheckAnalysis extends PredicateHelper {
368369
s"""nondeterministic expressions are only allowed in
369370
|Project, Filter, Aggregate or Window, found:
370371
| ${o.expressions.map(_.sql).mkString(",")}
371-
|in operator ${operator.simpleString}
372+
|in operator ${operator.simpleString(SQLConf.get.maxToStringFields)}
372373
""".stripMargin)
373374

374375
case _: UnresolvedHint =>
@@ -380,7 +381,8 @@ trait CheckAnalysis extends PredicateHelper {
380381
}
381382
extendedCheckRules.foreach(_(plan))
382383
plan.foreachUp {
383-
case o if !o.resolved => failAnalysis(s"unresolved operator ${o.simpleString}")
384+
case o if !o.resolved =>
385+
failAnalysis(s"unresolved operator ${o.simpleString(SQLConf.get.maxToStringFields)}")
384386
case _ =>
385387
}
386388

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercion.scala

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1069,8 +1069,8 @@ trait TypeCoercionRule extends Rule[LogicalPlan] with Logging {
10691069
// Leave the same if the dataTypes match.
10701070
case Some(newType) if a.dataType == newType.dataType => a
10711071
case Some(newType) =>
1072-
logDebug(
1073-
s"Promoting $a from ${a.dataType} to ${newType.dataType} in ${q.simpleString}")
1072+
logDebug(s"Promoting $a from ${a.dataType} to ${newType.dataType} in " +
1073+
s" ${q.simpleString(SQLConf.get.maxToStringFields)}")
10741074
newType
10751075
}
10761076
}

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/encoders/ExpressionEncoder.scala

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@ import org.apache.spark.sql.catalyst.expressions.codegen.{GenerateSafeProjection
2828
import org.apache.spark.sql.catalyst.expressions.objects.{AssertNotNull, InitializeJavaBean, Invoke, NewInstance}
2929
import org.apache.spark.sql.catalyst.optimizer.SimplifyCasts
3030
import org.apache.spark.sql.catalyst.plans.logical.{CatalystSerde, DeserializeToObject, LocalRelation}
31+
import org.apache.spark.sql.internal.SQLConf
3132
import org.apache.spark.sql.types.{ObjectType, StringType, StructField, StructType}
3233
import org.apache.spark.unsafe.types.UTF8String
3334
import org.apache.spark.util.Utils
@@ -323,8 +324,8 @@ case class ExpressionEncoder[T](
323324
extractProjection(inputRow)
324325
} catch {
325326
case e: Exception =>
326-
throw new RuntimeException(
327-
s"Error while encoding: $e\n${serializer.map(_.simpleString).mkString("\n")}", e)
327+
throw new RuntimeException(s"Error while encoding: $e\n" +
328+
s"${serializer.map(_.simpleString(SQLConf.get.maxToStringFields)).mkString("\n")}", e)
328329
}
329330

330331
/**
@@ -336,7 +337,8 @@ case class ExpressionEncoder[T](
336337
constructProjection(row).get(0, ObjectType(clsTag.runtimeClass)).asInstanceOf[T]
337338
} catch {
338339
case e: Exception =>
339-
throw new RuntimeException(s"Error while decoding: $e\n${deserializer.simpleString}", e)
340+
throw new RuntimeException(s"Error while decoding: $e\n" +
341+
s"${deserializer.simpleString(SQLConf.get.maxToStringFields)}", e)
340342
}
341343

342344
/**

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Expression.scala

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -259,12 +259,12 @@ abstract class Expression extends TreeNode[Expression] {
259259

260260
// Marks this as final, Expression.verboseString should never be called, and thus shouldn't be
261261
// overridden by concrete classes.
262-
final override def verboseString: String = simpleString
262+
final override def verboseString(maxFields: Int): String = simpleString(maxFields)
263263

264-
override def simpleString: String = toString
264+
override def simpleString(maxFields: Int): String = toString
265265

266266
override def toString: String = prettyName + truncatedString(
267-
flatArguments.toSeq, "(", ", ", ")")
267+
flatArguments.toSeq, "(", ", ", ")", SQLConf.get.maxToStringFields)
268268

269269
/**
270270
* Returns SQL representation of this expression. For expressions extending [[NonSQLExpression]],

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/javaCode.scala

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -197,7 +197,7 @@ trait Block extends TreeNode[Block] with JavaCode {
197197
case _ => code"$this\n$other"
198198
}
199199

200-
override def verboseString: String = toString
200+
override def verboseString(maxFields: Int): String = toString
201201
}
202202

203203
object Block {

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/generators.scala

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@ import org.apache.spark.sql.catalyst.analysis.TypeCheckResult
2525
import org.apache.spark.sql.catalyst.expressions.codegen._
2626
import org.apache.spark.sql.catalyst.expressions.codegen.Block._
2727
import org.apache.spark.sql.catalyst.util.{ArrayData, MapData}
28+
import org.apache.spark.sql.internal.SQLConf
2829
import org.apache.spark.sql.types._
2930

3031
/**
@@ -101,7 +102,7 @@ case class UserDefinedGenerator(
101102
inputRow = new InterpretedProjection(children)
102103
convertToScala = {
103104
val inputSchema = StructType(children.map { e =>
104-
StructField(e.simpleString, e.dataType, nullable = true)
105+
StructField(e.simpleString(SQLConf.get.maxToStringFields), e.dataType, nullable = true)
105106
})
106107
CatalystTypeConverters.createToScalaConverter(inputSchema)
107108
}.asInstanceOf[InternalRow => Row]

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/higherOrderFunctions.scala

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -76,7 +76,9 @@ case class NamedLambdaVariable(
7676

7777
override def toString: String = s"lambda $name#${exprId.id}$typeSuffix"
7878

79-
override def simpleString: String = s"lambda $name#${exprId.id}: ${dataType.simpleString}"
79+
override def simpleString(maxFields: Int): String = {
80+
s"lambda $name#${exprId.id}: ${dataType.simpleString(maxFields)}"
81+
}
8082
}
8183

8284
/**

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/misc.scala

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@ import org.apache.spark.sql.catalyst.InternalRow
2323
import org.apache.spark.sql.catalyst.expressions.codegen._
2424
import org.apache.spark.sql.catalyst.expressions.codegen.Block._
2525
import org.apache.spark.sql.catalyst.util.RandomUUIDGenerator
26+
import org.apache.spark.sql.internal.SQLConf
2627
import org.apache.spark.sql.types._
2728
import org.apache.spark.unsafe.types.UTF8String
2829

@@ -40,7 +41,7 @@ case class PrintToStderr(child: Expression) extends UnaryExpression {
4041
input
4142
}
4243

43-
private val outputPrefix = s"Result of ${child.simpleString} is "
44+
private val outputPrefix = s"Result of ${child.simpleString(SQLConf.get.maxToStringFields)} is "
4445

4546
override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
4647
val outputPrefixField = ctx.addReferenceObj("outputPrefix", outputPrefix)
@@ -72,7 +73,7 @@ case class AssertTrue(child: Expression) extends UnaryExpression with ImplicitCa
7273

7374
override def prettyName: String = "assert_true"
7475

75-
private val errMsg = s"'${child.simpleString}' is not true!"
76+
private val errMsg = s"'${child.simpleString(SQLConf.get.maxToStringFields)}' is not true!"
7677

7778
override def eval(input: InternalRow) : Any = {
7879
val v = child.eval(input)

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/namedExpressions.scala

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -330,7 +330,9 @@ case class AttributeReference(
330330

331331
// Since the expression id is not in the first constructor it is missing from the default
332332
// tree string.
333-
override def simpleString: String = s"$name#${exprId.id}: ${dataType.simpleString}"
333+
override def simpleString(maxFields: Int): String = {
334+
s"$name#${exprId.id}: ${dataType.simpleString(maxFields)}"
335+
}
334336

335337
override def sql: String = {
336338
val qualifierPrefix = if (qualifier.nonEmpty) qualifier.mkString(".") + "." else ""

0 commit comments

Comments
 (0)