File tree Expand file tree Collapse file tree
catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical
core/src/main/scala/org/apache/spark/sql
hive/src/main/scala/org/apache/spark/sql/hive Expand file tree Collapse file tree Original file line number Diff line number Diff line change @@ -26,18 +26,17 @@ import org.apache.spark.sql.catalyst.trees
2626abstract class LogicalPlan extends QueryPlan [LogicalPlan ] {
2727 self : Product =>
2828
29- // TODO: make a case class?
3029 /**
3130 * Estimates of various statistics. The default estimation logic simply sums up the corresponding
3231 * statistic produced by the children. To override this behavior, override `statistics` and
3332 * assign it a overriden version of `Statistics`.
3433 */
35- protected class Statistics {
36- lazy val childrenStats = children.map(_.statistics)
37- lazy val numTuples : Long = childrenStats.map(_.numTuples).sum
38- lazy val sizeInBytes : Long = childrenStats.map(_.sizeInBytes).sum
39- }
34+ case class Statistics (
35+ numTuples : Long = childrenStats.map(_.numTuples).sum,
36+ sizeInBytes : Long = childrenStats.map(_.sizeInBytes).sum
37+ )
4038 lazy val statistics : Statistics = new Statistics
39+ lazy val childrenStats = children.map(_.statistics)
4140
4241 /**
4342 * Returns the set of attributes that are referenced by this node
Original file line number Diff line number Diff line change @@ -81,19 +81,19 @@ case class SparkLogicalPlan(alreadyPlanned: SparkPlan)
8181 }).asInstanceOf [this .type ]
8282 }
8383
84- override lazy val statistics = new Statistics {
84+ @ transient override lazy val statistics = Statistics (
8585 // If this is wrapping around ExistingRdd and no reasonable estimation logic is implemented,
8686 // return a default value.
87- override lazy val sizeInBytes : Long = {
87+ sizeInBytes = {
8888 val defaultSum = childrenStats.map(_.sizeInBytes).sum
8989 alreadyPlanned match {
9090 // TODO: Instead of returning a default value here, find a way to return a meaningful
91- // estimate for RDDs. See PR 1238 for more discussions.
91+ // size estimate for RDDs. See PR 1238 for more discussions.
9292 case e : ExistingRdd if defaultSum == 0 => statsDefaultSizeInBytes
9393 case _ => defaultSum
9494 }
9595 }
96- }
96+ )
9797
9898}
9999
Original file line number Diff line number Diff line change @@ -53,14 +53,14 @@ private[sql] case class ParquetRelation(
5353
5454 self : Product =>
5555
56- @ transient override lazy val statistics = new Statistics {
56+ @ transient override lazy val statistics = Statistics (
5757 // TODO: investigate getting encoded column statistics in the parquet file?
58- override lazy val sizeInBytes : Long = {
58+ sizeInBytes = {
5959 val hdfsPath = new Path (path)
6060 val fs = hdfsPath.getFileSystem(conf.getOrElse(ContextUtil .getConfiguration(new Job ())))
6161 fs.getContentSummary(hdfsPath).getLength // TODO: in bytes or system-dependent?
6262 }
63- }
63+ )
6464
6565 /** Schema derived from ParquetFile */
6666 def parquetSchema : MessageType =
Original file line number Diff line number Diff line change @@ -273,7 +273,7 @@ private[hive] case class MetastoreRelation
273273 @ transient override lazy val statistics = new Statistics {
274274 // TODO: check if this estimate is valid for tables after partition pruning.
275275 // Size getters adapted from SizeBasedBigTableSelectorForAutoSMJ.java in Hive (version 0.13).
276- override lazy val sizeInBytes : Long =
276+ override val sizeInBytes : Long =
277277 maybeGetSize(hiveConf, hiveQlTable.getProperty(" totalSize" ), path)
278278
279279 private [this ] def maybeGetSize (conf : HiveConf , size : String , path : Path ): Long = {
You can’t perform that action at this time.
0 commit comments