diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala index 7f10bdbc80ca..485015f2efab 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala @@ -141,17 +141,45 @@ trait CheckAnalysis extends PredicateHelper with LookupCatalog with QueryErrorsB errorClass, missingCol, orderedCandidates, a.origin) } + private def checkUnreferencedCTERelations( + cteMap: mutable.Map[Long, (CTERelationDef, Int, mutable.Map[Long, Int])], + visited: mutable.Map[Long, Boolean], + danglingCTERelations: mutable.ArrayBuffer[CTERelationDef], + cteId: Long): Unit = { + if (visited(cteId)) { + return + } + val (cteDef, _, refMap) = cteMap(cteId) + refMap.foreach { case (id, _) => + checkUnreferencedCTERelations(cteMap, visited, danglingCTERelations, id) + } + danglingCTERelations.append(cteDef) + visited(cteId) = true + } + def checkAnalysis(plan: LogicalPlan): Unit = { val inlineCTE = InlineCTE(alwaysInline = true) val cteMap = mutable.HashMap.empty[Long, (CTERelationDef, Int, mutable.Map[Long, Int])] inlineCTE.buildCTEMap(plan, cteMap) - cteMap.values.foreach { case (relation, refCount, _) => - // If a CTE relation is never used, it will disappear after inline. Here we explicitly check - // analysis for it, to make sure the entire query plan is valid. - if (refCount == 0) checkAnalysis0(relation.child) + val danglingCTERelations = mutable.ArrayBuffer.empty[CTERelationDef] + val visited: mutable.Map[Long, Boolean] = mutable.Map.empty.withDefaultValue(false) + // If a CTE relation is never used, it will disappear after inline. Here we explicitly collect + // these dangling CTE relations, and put them back in the main query, to make sure the entire + // query plan is valid. + cteMap.foreach { case (cteId, (_, refCount, _)) => + // If a CTE relation ref count is 0, the other CTE relations that reference it should also be + // collected. This code will also guarantee the leaf relations that do not reference + // any others are collected first. + if (refCount == 0) { + checkUnreferencedCTERelations(cteMap, visited, danglingCTERelations, cteId) + } } // Inline all CTEs in the plan to help check query plan structures in subqueries. - checkAnalysis0(inlineCTE(plan)) + var inlinedPlan: LogicalPlan = inlineCTE(plan) + if (danglingCTERelations.nonEmpty) { + inlinedPlan = WithCTE(inlinedPlan, danglingCTERelations.toSeq) + } + checkAnalysis0(inlinedPlan) plan.setAnalyzed() }