apache · nblintao · Jul 12, 2016 · Jul 13, 2016 · Jul 15, 2016 · May 30, 2017
diff --git a/core/src/main/scala/org/apache/spark/ui/ToolTips.scala b/core/src/main/scala/org/apache/spark/ui/ToolTips.scala
@@ -99,4 +99,9 @@ private[spark] object ToolTips {
        dynamic allocation is enabled. The number of granted executors may exceed the limit
        ephemerally when executors are being killed.
     """
+
+  val SQL_TEXT =
+    """Shows 140 characters by default. Click "+more" to see more. Long texts are truncated to 1000
+      |characters. Left blank when the query was not issued by SQL."""
+      .stripMargin.replaceAll("\n", " ")
 }
diff --git a/core/src/main/scala/org/apache/spark/ui/UIUtils.scala b/core/src/main/scala/org/apache/spark/ui/UIUtils.scala
@@ -296,7 +296,9 @@ private[spark] object UIUtils extends Logging {
       id: Option[String] = None,
       headerClasses: Seq[String] = Seq.empty,
       stripeRowsWithCss: Boolean = true,
-      sortable: Boolean = true): Seq[Node] = {
+      sortable: Boolean = true,
+      // If the tool tip is defined, Some(toolTipText, toolTipPosition), otherwise None.
+      headerToolTips: Seq[Option[(String, String)]] = Seq.empty): Seq[Node] = {
 
     val listingTableClass = {
       val _tableClass = if (stripeRowsWithCss) TABLE_CLASS_STRIPED else TABLE_CLASS_NOT_STRIPED
@@ -317,6 +319,14 @@ private[spark] object UIUtils extends Logging {
       }
     }
 
+    def getToolTip(index: Int): Option[(String, String)] = {
+      if (index < headerToolTips.size) {
+        headerToolTips(index)
+      } else {
+        None
+      }
+    }
+
     val newlinesInHeader = headers.exists(_.contains("\n"))
     def getHeaderContent(header: String): Seq[Node] = {
       if (newlinesInHeader) {
@@ -330,7 +340,16 @@ private[spark] object UIUtils extends Logging {
 
     val headerRow: Seq[Node] = {
       headers.view.zipWithIndex.map { x =>
-        <th width={colWidthAttr} class={getClass(x._2)}>{getHeaderContent(x._1)}</th>
+        val toolTipOption = getToolTip(x._2)
+        if (toolTipOption.isEmpty) {
+          <th width={colWidthAttr} class={getClass(x._2)}>{getHeaderContent(x._1)}</th>
+        } else {
+          val toolTip = toolTipOption.get
+          // scalastyle:off line.size.limit
+          <th width={colWidthAttr} class={getClass(x._2)} data-toggle="tooltip" title={toolTip._1} data-placement={toolTip._2}>{getHeaderContent(x._1)}</th>
+          // scalastyle:on line.size.limit
+        }
+
       }
     }
     <table class={listingTableClass} id={id.map(Text.apply)}>

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParseDriver.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParseDriver.scala
@@ -65,13 +65,29 @@ abstract class AbstractSqlParser extends ParserInterface with Logging {
   }
 
   /** Creates LogicalPlan for a given SQL string. */
-  override def parsePlan(sqlText: String): LogicalPlan = parse(sqlText) { parser =>
-    astBuilder.visitSingleStatement(parser.singleStatement()) match {
-      case plan: LogicalPlan => plan
-      case _ =>
-        val position = Origin(None, None)
-        throw new ParseException(Option(sqlText), "Unsupported SQL statement", position, position)
+  override def parsePlan(sqlText: String): LogicalPlan = {
+    val logicalPlan = parse(sqlText) { parser =>
+      astBuilder.visitSingleStatement(parser.singleStatement()) match {
+        case plan: LogicalPlan => plan
+        case _ =>
+          val position = Origin(None, None)
+          throw new ParseException(Option(sqlText), "Unsupported SQL statement", position, position)
+      }
+    }
+    // Record the original sql text in the top logical plan for checking in the web UI.
+    // Truncate the text to avoid downing browsers or web UI servers by running out of memory.
+    val maxLength = 1000
+    val suffix = " ... (truncated)"
+    val truncateLength = maxLength - suffix.length
+    val truncatedSqlText = {
+      if (sqlText.length <= maxLength) {
+        sqlText
+      } else {
+        sqlText.substring(0, truncateLength) + suffix
+      }
     }
+    logicalPlan.sqlText = Some(truncatedSqlText)
+    logicalPlan
   }
 
   /** Get the builder (visitor) which converts a ParseTree into an AST. */

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/LogicalPlan.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/LogicalPlan.scala
@@ -258,6 +258,9 @@ abstract class LogicalPlan extends QueryPlan[LogicalPlan] with Logging {
    * Refreshes (or invalidates) any metadata/data cached in the plan recursively.
    */
   def refresh(): Unit = children.foreach(_.refresh())
+
+  // Record the original sql text in the top logical plan for checking in the web UI.
+  var sqlText: Option[String] = None
 }
 
 /**

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala b/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala
@@ -2945,7 +2945,10 @@ class Dataset[T] private[sql](
 
   /** A convenient function to wrap a logical plan and produce a Dataset. */
   @inline private def withTypedPlan[U : Encoder](logicalPlan: => LogicalPlan): Dataset[U] = {
-    Dataset(sparkSession, logicalPlan)
+    val dataset: Dataset[U] = Dataset(sparkSession, logicalPlan)
+    // Copy the original sql text for checking in the web UI.
+    dataset.logicalPlan.sqlText = queryExecution.logical.sqlText
+    dataset
   }
 
   /** A convenient function to wrap a set based logical plan and produce a Dataset. */

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/SQLExecution.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/SQLExecution.scala
@@ -60,7 +60,8 @@ object SQLExecution {
 
         sparkSession.sparkContext.listenerBus.post(SparkListenerSQLExecutionStart(
           executionId, callSite.shortForm, callSite.longForm, queryExecution.toString,
-          SparkPlanInfo.fromSparkPlan(queryExecution.executedPlan), System.currentTimeMillis()))
+          SparkPlanInfo.fromSparkPlan(queryExecution.executedPlan), queryExecution.logical.sqlText,
+          System.currentTimeMillis()))
         try {
           body
         } finally {

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/ui/AllExecutionsPage.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/ui/AllExecutionsPage.scala
@@ -25,7 +25,7 @@ import scala.xml.Node
 import org.apache.commons.lang3.StringEscapeUtils
 
 import org.apache.spark.internal.Logging
-import org.apache.spark.ui.{UIUtils, WebUIPage}
+import org.apache.spark.ui.{ToolTips, UIUtils, WebUIPage}
 
 private[ui] class AllExecutionsPage(parent: SQLTab) extends WebUIPage("") with Logging {
 
@@ -60,6 +60,10 @@ private[ui] class AllExecutionsPage(parent: SQLTab) extends WebUIPage("") with L
         function clickDetail(details) {{
           details.parentNode.querySelector('.stage-details').classList.toggle('collapsed')
         }}
+        function clickMore(details) {{
+          details.parentNode.querySelector('.sql-abstract').classList.toggle('collapsed')
+          details.parentNode.querySelector('.sql-full').classList.toggle('collapsed')
+        }}
       </script>
     UIUtils.headerSparkPage("SQL", content, parent, Some(5000))
   }
@@ -83,10 +87,13 @@ private[ui] abstract class ExecutionTable(
 
   protected def header: Seq[String]
 
-  protected def row(currentTime: Long, executionUIData: SQLExecutionUIData): Seq[Node] = {
+  protected def row(currentTime: Long, executionUIData: SQLExecutionUIData, showSqlText: Boolean)
+    : Seq[Node] = {
     val submissionTime = executionUIData.submissionTime
     val duration = executionUIData.completionTime.getOrElse(currentTime) - submissionTime
 
+    val sqlText = executionUIData.sqlText.getOrElse("")
+
     val runningJobs = executionUIData.runningJobs.map { jobId =>
       <a href={jobURL(jobId)}>{jobId.toString}</a><br/>
     }
@@ -124,6 +131,11 @@ private[ui] abstract class ExecutionTable(
           {failedJobs}
         </td>
       }}
+      {if (showSqlText) {
+        <td>
+          {sqlTextCell(sqlText)}
+        </td>
+      }}
     </tr>
   }
 
@@ -146,11 +158,43 @@ private[ui] abstract class ExecutionTable(
     <div>{desc} {details}</div>
   }
 
+  private def sqlTextCell(sqlText: String): Seq[Node] = {
+    // Only show a limited number of characters of sqlText by default when it is too long
+    val maxLength = 140
+
+    if (sqlText.length <= maxLength) {
+      <div>{sqlText}</div>
+    } else {
+      val sqlAbstractText = sqlText.substring(0, maxLength) + " ..."
+      <div>
+        <div class="stage-details sql-abstract">
+          {sqlAbstractText}
+        </div>
+        <div class="stage-details sql-full collapsed">
+          {sqlText}
+        </div>
+        <span onclick="clickMore(this)" class="expand-details">
+          +more
+        </span>
+      </div>
+    }
+  }
+
   def toNodeSeq: Seq[Node] = {
+    val showSqlText = executionUIDatas.exists(_.sqlText.isDefined)
+    val headerFull = header ++ {if (showSqlText) Seq("SQL Text") else Seq.empty}
+    val sqlTextToolTip = {if (showSqlText) {
+      Seq(Some(ToolTips.SQL_TEXT, "top"))
+    } else {
+      Seq.empty
+    }}
+    val headerToolTips: Seq[Option[(String, String)]] = header.map(_ => None) ++ sqlTextToolTip
+
     <div>
       <h4>{tableName}</h4>
       {UIUtils.listingTable[SQLExecutionUIData](
-        header, row(currentTime, _), executionUIDatas, id = Some(tableId))}
+        headerFull, row(currentTime, _, showSqlText), executionUIDatas, id = Some(tableId),
+        headerToolTips = headerToolTips)}
     </div>
   }
 

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/ui/SQLListener.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/ui/SQLListener.scala
@@ -40,6 +40,7 @@ case class SparkListenerSQLExecutionStart(
     details: String,
     physicalPlanDescription: String,
     sparkPlanInfo: SparkPlanInfo,
+    sqlText: Option[String],
     time: Long)
   extends SparkListenerEvent
 
@@ -268,7 +269,7 @@ class SQLListener(conf: SparkConf) extends SparkListener with Logging {
 
   override def onOtherEvent(event: SparkListenerEvent): Unit = event match {
     case SparkListenerSQLExecutionStart(executionId, description, details,
-      physicalPlanDescription, sparkPlanInfo, time) =>
+      physicalPlanDescription, sparkPlanInfo, sqlText, time) =>
       val physicalPlanGraph = SparkPlanGraph(sparkPlanInfo)
       val sqlPlanMetrics = physicalPlanGraph.allNodes.flatMap { node =>
         node.metrics.map(metric => metric.accumulatorId -> metric)
@@ -280,6 +281,7 @@ class SQLListener(conf: SparkConf) extends SparkListener with Logging {
         physicalPlanDescription,
         physicalPlanGraph,
         sqlPlanMetrics.toMap,
+        sqlText,
         time)
       synchronized {
         activeExecutions(executionId) = executionUIData
@@ -428,6 +430,7 @@ private[ui] class SQLExecutionUIData(
     val physicalPlanDescription: String,
     val physicalPlanGraph: SparkPlanGraph,
     val accumulatorMetrics: Map[Long, SQLPlanMetric],
+    val sqlText: Option[String],
     val submissionTime: Long) {
 
   var completionTime: Option[Long] = None

diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/ui/SQLListenerSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/ui/SQLListenerSuite.scala
@@ -123,6 +123,7 @@ class SQLListenerSuite extends SparkFunSuite with SharedSQLContext with JsonTest
       "test",
       df.queryExecution.toString,
       SparkPlanInfo.fromSparkPlan(df.queryExecution.executedPlan),
+      None,
       System.currentTimeMillis()))
 
     val executionUIData = listener.executionIdToData(0)
@@ -259,6 +260,7 @@ class SQLListenerSuite extends SparkFunSuite with SharedSQLContext with JsonTest
       "test",
       df.queryExecution.toString,
       SparkPlanInfo.fromSparkPlan(df.queryExecution.executedPlan),
+      None,
       System.currentTimeMillis()))
     listener.onJobStart(SparkListenerJobStart(
       jobId = 0,
@@ -289,6 +291,7 @@ class SQLListenerSuite extends SparkFunSuite with SharedSQLContext with JsonTest
       "test",
       df.queryExecution.toString,
       SparkPlanInfo.fromSparkPlan(df.queryExecution.executedPlan),
+      None,
       System.currentTimeMillis()))
     listener.onJobStart(SparkListenerJobStart(
       jobId = 0,
@@ -330,6 +333,7 @@ class SQLListenerSuite extends SparkFunSuite with SharedSQLContext with JsonTest
       "test",
       df.queryExecution.toString,
       SparkPlanInfo.fromSparkPlan(df.queryExecution.executedPlan),
+      None,
       System.currentTimeMillis()))
     listener.onJobStart(SparkListenerJobStart(
       jobId = 0,
@@ -369,7 +373,7 @@ class SQLListenerSuite extends SparkFunSuite with SharedSQLContext with JsonTest
     // These are largely just boilerplate unrelated to what we're trying to test.
     val df = createTestDataFrame
     val executionStart = SparkListenerSQLExecutionStart(
-      0, "", "", "", SparkPlanInfo.fromSparkPlan(df.queryExecution.executedPlan), 0)
+      0, "", "", "", SparkPlanInfo.fromSparkPlan(df.queryExecution.executedPlan), None, 0)
     val stageInfo = createStageInfo(0, 0)
     val jobStart = SparkListenerJobStart(0, 0, Seq(stageInfo), createProperties(0))
     val stageSubmitted = SparkListenerStageSubmitted(stageInfo)