Skip to content

Commit a0f1a11

Browse files
committed
[SPARK-11981][SQL] Move implementations of methods back to DataFrame from Queryable
Also added show methods to Dataset. Author: Reynold Xin <rxin@databricks.com> Closes #9964 from rxin/SPARK-11981.
1 parent 2610e06 commit a0f1a11

3 files changed

Lines changed: 111 additions & 33 deletions

File tree

sql/core/src/main/scala/org/apache/spark/sql/DataFrame.scala

Lines changed: 32 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@ import org.apache.spark.sql.catalyst.expressions.aggregate._
3737
import org.apache.spark.sql.catalyst.plans.logical._
3838
import org.apache.spark.sql.catalyst.plans.{Inner, JoinType}
3939
import org.apache.spark.sql.catalyst.{CatalystTypeConverters, ScalaReflection, SqlParser}
40-
import org.apache.spark.sql.execution.{EvaluatePython, FileRelation, LogicalRDD, QueryExecution, Queryable, SQLExecution}
40+
import org.apache.spark.sql.execution.{EvaluatePython, ExplainCommand, FileRelation, LogicalRDD, QueryExecution, Queryable, SQLExecution}
4141
import org.apache.spark.sql.execution.datasources.{CreateTableUsingAsSelect, LogicalRelation}
4242
import org.apache.spark.sql.execution.datasources.json.JacksonGenerator
4343
import org.apache.spark.sql.sources.HadoopFsRelation
@@ -112,8 +112,8 @@ private[sql] object DataFrame {
112112
*/
113113
@Experimental
114114
class DataFrame private[sql](
115-
@transient val sqlContext: SQLContext,
116-
@DeveloperApi @transient val queryExecution: QueryExecution)
115+
@transient override val sqlContext: SQLContext,
116+
@DeveloperApi @transient override val queryExecution: QueryExecution)
117117
extends Queryable with Serializable {
118118

119119
// Note for Spark contributors: if adding or updating any action in `DataFrame`, please make sure
@@ -282,6 +282,35 @@ class DataFrame private[sql](
282282
*/
283283
def schema: StructType = queryExecution.analyzed.schema
284284

285+
/**
286+
* Prints the schema to the console in a nice tree format.
287+
* @group basic
288+
* @since 1.3.0
289+
*/
290+
// scalastyle:off println
291+
override def printSchema(): Unit = println(schema.treeString)
292+
// scalastyle:on println
293+
294+
/**
295+
* Prints the plans (logical and physical) to the console for debugging purposes.
296+
* @group basic
297+
* @since 1.3.0
298+
*/
299+
override def explain(extended: Boolean): Unit = {
300+
val explain = ExplainCommand(queryExecution.logical, extended = extended)
301+
sqlContext.executePlan(explain).executedPlan.executeCollect().foreach {
302+
// scalastyle:off println
303+
r => println(r.getString(0))
304+
// scalastyle:on println
305+
}
306+
}
307+
308+
/**
309+
* Prints the physical plan to the console for debugging purposes.
310+
* @since 1.3.0
311+
*/
312+
override def explain(): Unit = explain(extended = false)
313+
285314
/**
286315
* Returns all column names and their data types as an array.
287316
* @group basic

sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala

Lines changed: 74 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -61,8 +61,8 @@ import org.apache.spark.util.Utils
6161
*/
6262
@Experimental
6363
class Dataset[T] private[sql](
64-
@transient val sqlContext: SQLContext,
65-
@transient val queryExecution: QueryExecution,
64+
@transient override val sqlContext: SQLContext,
65+
@transient override val queryExecution: QueryExecution,
6666
tEncoder: Encoder[T]) extends Queryable with Serializable {
6767

6868
/**
@@ -85,7 +85,25 @@ class Dataset[T] private[sql](
8585
* Returns the schema of the encoded form of the objects in this [[Dataset]].
8686
* @since 1.6.0
8787
*/
88-
def schema: StructType = resolvedTEncoder.schema
88+
override def schema: StructType = resolvedTEncoder.schema
89+
90+
/**
91+
* Prints the schema of the underlying [[DataFrame]] to the console in a nice tree format.
92+
* @since 1.6.0
93+
*/
94+
override def printSchema(): Unit = toDF().printSchema()
95+
96+
/**
97+
* Prints the plans (logical and physical) to the console for debugging purposes.
98+
* @since 1.6.0
99+
*/
100+
override def explain(extended: Boolean): Unit = toDF().explain(extended)
101+
102+
/**
103+
* Prints the physical plan to the console for debugging purposes.
104+
* @since 1.6.0
105+
*/
106+
override def explain(): Unit = toDF().explain()
89107

90108
/* ************* *
91109
* Conversions *
@@ -152,6 +170,59 @@ class Dataset[T] private[sql](
152170
*/
153171
def count(): Long = toDF().count()
154172

173+
/**
174+
* Displays the content of this [[Dataset]] in a tabular form. Strings more than 20 characters
175+
* will be truncated, and all cells will be aligned right. For example:
176+
* {{{
177+
* year month AVG('Adj Close) MAX('Adj Close)
178+
* 1980 12 0.503218 0.595103
179+
* 1981 01 0.523289 0.570307
180+
* 1982 02 0.436504 0.475256
181+
* 1983 03 0.410516 0.442194
182+
* 1984 04 0.450090 0.483521
183+
* }}}
184+
* @param numRows Number of rows to show
185+
*
186+
* @since 1.6.0
187+
*/
188+
def show(numRows: Int): Unit = show(numRows, truncate = true)
189+
190+
/**
191+
* Displays the top 20 rows of [[DataFrame]] in a tabular form. Strings more than 20 characters
192+
* will be truncated, and all cells will be aligned right.
193+
*
194+
* @since 1.6.0
195+
*/
196+
def show(): Unit = show(20)
197+
198+
/**
199+
* Displays the top 20 rows of [[DataFrame]] in a tabular form.
200+
*
201+
* @param truncate Whether truncate long strings. If true, strings more than 20 characters will
202+
* be truncated and all cells will be aligned right
203+
*
204+
* @since 1.6.0
205+
*/
206+
def show(truncate: Boolean): Unit = show(20, truncate)
207+
208+
/**
209+
* Displays the [[DataFrame]] in a tabular form. For example:
210+
* {{{
211+
* year month AVG('Adj Close) MAX('Adj Close)
212+
* 1980 12 0.503218 0.595103
213+
* 1981 01 0.523289 0.570307
214+
* 1982 02 0.436504 0.475256
215+
* 1983 03 0.410516 0.442194
216+
* 1984 04 0.450090 0.483521
217+
* }}}
218+
* @param numRows Number of rows to show
219+
* @param truncate Whether truncate long strings. If true, strings more than 20 characters will
220+
* be truncated and all cells will be aligned right
221+
*
222+
* @since 1.6.0
223+
*/
224+
def show(numRows: Int, truncate: Boolean): Unit = toDF().show(numRows, truncate)
225+
155226
/**
156227
* Returns a new [[Dataset]] that has exactly `numPartitions` partitions.
157228
* @since 1.6.0

sql/core/src/main/scala/org/apache/spark/sql/execution/Queryable.scala

Lines changed: 5 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -17,11 +17,11 @@
1717

1818
package org.apache.spark.sql.execution
1919

20+
import scala.util.control.NonFatal
21+
2022
import org.apache.spark.sql.SQLContext
2123
import org.apache.spark.sql.types.StructType
2224

23-
import scala.util.control.NonFatal
24-
2525
/** A trait that holds shared code between DataFrames and Datasets. */
2626
private[sql] trait Queryable {
2727
def schema: StructType
@@ -37,31 +37,9 @@ private[sql] trait Queryable {
3737
}
3838
}
3939

40-
/**
41-
* Prints the schema to the console in a nice tree format.
42-
* @group basic
43-
* @since 1.3.0
44-
*/
45-
// scalastyle:off println
46-
def printSchema(): Unit = println(schema.treeString)
47-
// scalastyle:on println
40+
def printSchema(): Unit
4841

49-
/**
50-
* Prints the plans (logical and physical) to the console for debugging purposes.
51-
* @since 1.3.0
52-
*/
53-
def explain(extended: Boolean): Unit = {
54-
val explain = ExplainCommand(queryExecution.logical, extended = extended)
55-
sqlContext.executePlan(explain).executedPlan.executeCollect().foreach {
56-
// scalastyle:off println
57-
r => println(r.getString(0))
58-
// scalastyle:on println
59-
}
60-
}
42+
def explain(extended: Boolean): Unit
6143

62-
/**
63-
* Only prints the physical plan to the console for debugging purposes.
64-
* @since 1.3.0
65-
*/
66-
def explain(): Unit = explain(extended = false)
44+
def explain(): Unit
6745
}

0 commit comments

Comments
 (0)