-
Notifications
You must be signed in to change notification settings - Fork 29k
[SPARK-34923][SQL] Metadata output should be empty for more plans #32017
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 4 commits
6b4cba3
6c3dde2
ce3ac0e
a2d72ef
a4a7d05
5a04a7e
73219d4
b1c0183
e8e6e7d
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -61,4 +61,6 @@ case class EventTimeWatermark( | |
| a | ||
| } | ||
| } | ||
|
|
||
| override val metadataOutput: Seq[Attribute] = child.metadataOutput | ||
| } | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -31,7 +31,7 @@ import org.apache.spark.sql.types._ | |
| import org.apache.spark.util.random.RandomSampler | ||
|
|
||
| /** | ||
| * When planning take() or collect() operations, this special node that is inserted at the top of | ||
| * When planning take() or collect() operations, this special node is inserted at the top of | ||
| * the logical plan before invoking the query planner. | ||
| * | ||
| * Rules can pattern-match on this node in order to apply transformations that only take effect | ||
|
|
@@ -40,6 +40,7 @@ import org.apache.spark.util.random.RandomSampler | |
| case class ReturnAnswer(child: LogicalPlan) extends UnaryNode { | ||
| override def maxRows: Option[Long] = child.maxRows | ||
| override def output: Seq[Attribute] = child.output | ||
| override def metadataOutput: Seq[Attribute] = child.metadataOutput | ||
| } | ||
|
|
||
| /** | ||
|
|
@@ -51,6 +52,7 @@ case class ReturnAnswer(child: LogicalPlan) extends UnaryNode { | |
| */ | ||
| case class Subquery(child: LogicalPlan, correlated: Boolean) extends OrderPreservingUnaryNode { | ||
| override def output: Seq[Attribute] = child.output | ||
| override def metadataOutput: Seq[Attribute] = child.metadataOutput | ||
| } | ||
|
|
||
| object Subquery { | ||
|
|
@@ -134,11 +136,13 @@ case class Generate( | |
| } | ||
|
|
||
| def output: Seq[Attribute] = requiredChildOutput ++ qualifiedGeneratorOutput | ||
| override def metadataOutput: Seq[Attribute] = child.metadataOutput | ||
| } | ||
|
|
||
| case class Filter(condition: Expression, child: LogicalPlan) | ||
| extends OrderPreservingUnaryNode with PredicateHelper { | ||
| override def output: Seq[Attribute] = child.output | ||
| override def metadataOutput: Seq[Attribute] = child.metadataOutput | ||
|
|
||
| override def maxRows: Option[Long] = child.maxRows | ||
|
|
||
|
|
@@ -364,6 +368,17 @@ case class Join( | |
| } | ||
| } | ||
|
|
||
| override def metadataOutput: Seq[Attribute] = { | ||
| joinType match { | ||
| case ExistenceJoin(_) => | ||
| left.metadataOutput | ||
| case LeftExistence(_) => | ||
| left.metadataOutput | ||
| case _ => | ||
| children.flatMap(_.metadataOutput) | ||
| } | ||
| } | ||
|
|
||
| override protected lazy val validConstraints: ExpressionSet = { | ||
| joinType match { | ||
| case _: InnerLike if condition.isDefined => | ||
|
|
@@ -520,6 +535,8 @@ object View { | |
| case class With(child: LogicalPlan, cteRelations: Seq[(String, SubqueryAlias)]) extends UnaryNode { | ||
| override def output: Seq[Attribute] = child.output | ||
|
|
||
| override def metadataOutput: Seq[Attribute] = child.metadataOutput | ||
|
|
||
| override def simpleString(maxFields: Int): String = { | ||
| val cteAliases = truncatedString(cteRelations.map(_._1), "[", ", ", "]", maxFields) | ||
| s"CTE $cteAliases" | ||
|
|
@@ -532,6 +549,7 @@ case class WithWindowDefinition( | |
| windowDefinitions: Map[String, WindowSpecDefinition], | ||
| child: LogicalPlan) extends UnaryNode { | ||
| override def output: Seq[Attribute] = child.output | ||
| override def metadataOutput: Seq[Attribute] = child.metadataOutput | ||
| } | ||
|
|
||
| /** | ||
|
|
@@ -545,6 +563,7 @@ case class Sort( | |
| global: Boolean, | ||
| child: LogicalPlan) extends UnaryNode { | ||
| override def output: Seq[Attribute] = child.output | ||
| override def metadataOutput: Seq[Attribute] = child.metadataOutput | ||
| override def maxRows: Option[Long] = child.maxRows | ||
| override def outputOrdering: Seq[SortOrder] = order | ||
| } | ||
|
|
@@ -669,6 +688,7 @@ case class Window( | |
| override def maxRows: Option[Long] = child.maxRows | ||
| override def output: Seq[Attribute] = | ||
| child.output ++ windowExpressions.map(_.toAttribute) | ||
| override def metadataOutput: Seq[Attribute] = child.metadataOutput | ||
|
|
||
| override def producedAttributes: AttributeSet = windowOutputSet | ||
|
|
||
|
|
@@ -861,6 +881,7 @@ object Limit { | |
| */ | ||
| case class GlobalLimit(limitExpr: Expression, child: LogicalPlan) extends OrderPreservingUnaryNode { | ||
| override def output: Seq[Attribute] = child.output | ||
| override def metadataOutput: Seq[Attribute] = child.metadataOutput | ||
| override def maxRows: Option[Long] = { | ||
| limitExpr match { | ||
| case IntegerLiteral(limit) => Some(limit) | ||
|
|
@@ -877,6 +898,7 @@ case class GlobalLimit(limitExpr: Expression, child: LogicalPlan) extends OrderP | |
| */ | ||
| case class LocalLimit(limitExpr: Expression, child: LogicalPlan) extends OrderPreservingUnaryNode { | ||
| override def output: Seq[Attribute] = child.output | ||
| override def metadataOutput: Seq[Attribute] = child.metadataOutput | ||
|
|
||
| override def maxRowsPerPartition: Option[Long] = { | ||
| limitExpr match { | ||
|
|
@@ -898,6 +920,7 @@ case class LocalLimit(limitExpr: Expression, child: LogicalPlan) extends OrderPr | |
| */ | ||
| case class Tail(limitExpr: Expression, child: LogicalPlan) extends OrderPreservingUnaryNode { | ||
| override def output: Seq[Attribute] = child.output | ||
| override def metadataOutput: Seq[Attribute] = child.metadataOutput | ||
| override def maxRows: Option[Long] = { | ||
| limitExpr match { | ||
| case IntegerLiteral(limit) => Some(limit) | ||
|
|
@@ -924,11 +947,6 @@ case class SubqueryAlias( | |
| child.output.map(_.withQualifier(qualifierList)) | ||
| } | ||
|
|
||
| override def metadataOutput: Seq[Attribute] = { | ||
| val qualifierList = identifier.qualifier :+ alias | ||
| child.metadataOutput.map(_.withQualifier(qualifierList)) | ||
| } | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why is the logic here removed? Won't this cause resolution failures when referencing a metadata column via an alias? Like
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @cloud-fan, should we support this case if it requires changing the query during analysis after being resolved?
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think we should only expose the metadata column in a single SELECT group, e.g. It's super weird if we can propagate the metadata column through SELECT groups, e.g. However, I do agree with @rdblue that simple alias should be supported. For example, That said, let's propagate metadata columns in |
||
|
|
||
| override def doCanonicalize(): LogicalPlan = child.canonicalized | ||
| } | ||
|
|
||
|
|
@@ -983,6 +1001,7 @@ case class Sample( | |
|
|
||
| override def maxRows: Option[Long] = child.maxRows | ||
| override def output: Seq[Attribute] = child.output | ||
| override def metadataOutput: Seq[Attribute] = child.metadataOutput | ||
| } | ||
|
|
||
| /** | ||
|
|
@@ -991,6 +1010,7 @@ case class Sample( | |
| case class Distinct(child: LogicalPlan) extends UnaryNode { | ||
| override def maxRows: Option[Long] = child.maxRows | ||
| override def output: Seq[Attribute] = child.output | ||
| override def metadataOutput: Seq[Attribute] = child.metadataOutput | ||
| } | ||
|
|
||
| /** | ||
|
|
@@ -1001,6 +1021,7 @@ abstract class RepartitionOperation extends UnaryNode { | |
| def numPartitions: Int | ||
| override final def maxRows: Option[Long] = child.maxRows | ||
| override def output: Seq[Attribute] = child.output | ||
| override def metadataOutput: Seq[Attribute] = child.metadataOutput | ||
| def partitioning: Partitioning | ||
| } | ||
|
|
||
|
|
@@ -1095,6 +1116,7 @@ case class Deduplicate( | |
| child: LogicalPlan) extends UnaryNode { | ||
| override def maxRows: Option[Long] = child.maxRows | ||
| override def output: Seq[Attribute] = child.output | ||
| override def metadataOutput: Seq[Attribute] = child.metadataOutput | ||
| } | ||
|
|
||
| /** | ||
|
|
@@ -1123,4 +1145,5 @@ case class CollectMetrics( | |
| } | ||
|
|
||
| override def output: Seq[Attribute] = child.output | ||
| override def metadataOutput: Seq[Attribute] = child.metadataOutput | ||
| } | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Nit: including unrelated changes tends to cause git conflicts.