[CARMEL-7348][CARMEL-5673] Track the Memory Usage of Large Objects in Driver (apache#126)

wangyum · GitHub Enterprise · commit 062163b82430 · 2024-01-16T07:36:51.000-06:00
diff --git a/core/src/main/scala/org/apache/spark/ContextCleaner.scala b/core/src/main/scala/org/apache/spark/ContextCleaner.scala
@@ -46,6 +46,7 @@ private case class CleanAccum(accId: Long) extends CleanupTask
 private case class CleanCheckpoint(rddId: Int) extends CleanupTask
 private case class CleanSparkListener(listener: SparkListener) extends CleanupTask
 private case class CleanSpilledPartitionResult(file: File) extends CleanupTask
+private case class CleanFileScanRDD(rddId: Int) extends CleanupTask
 
 /**
  * A WeakReference associated with a CleanupTask.
@@ -70,6 +71,8 @@ private[spark] class ContextCleaner(
     sc: SparkContext,
     shuffleDriverComponents: ShuffleDriverComponents) extends Logging {
 
+  protected val trackFileScanRddClean = sc.conf.get(CLEANER_REFERENCE_TRACKING_CLEAN_FILE_SCAN_RDD)
+
   private val periodicGCService: ScheduledExecutorService =
     ThreadUtils.newDaemonSingleThreadScheduledExecutor("context-cleaner-periodic-gc")
 
@@ -117,6 +120,8 @@ private[spark] class ContextCleaner(
       new CleanSparkListenerCleanupWorker(sc))
     contextCleanupWorkers.put(classOf[CleanSpilledPartitionResult].getName,
       new SpilledPartitionResultCleanupWorker(sc))
+    contextCleanupWorkers.put(classOf[CleanFileScanRDD].getName,
+      new FileScanRDDCleanupWorker(sc))
 
     contextCleanupWorkers.asScala.foreach(_._2.start())
 
@@ -138,6 +143,18 @@ private[spark] class ContextCleaner(
     registerForCleanup(rdd, CleanRDD(rdd.id))
   }
 
+  /** Register an File Scan RDD for cleanup when it is garbage collected. */
+  def registerFileScanRDDForCleanup(rdd: RDD[_]): Unit = {
+    if (trackFileScanRddClean) {
+      try {
+        registerForCleanup(rdd, CleanFileScanRDD(rdd.id))
+      } catch {
+        case t: Throwable =>
+          logError(s"Failed to register file scan rdd ${rdd.id}", t)
+      }
+    }
+  }
+
   def registerAccumulatorForCleanup(a: AccumulatorV2[_, _]): Unit = {
     registerForCleanup(a, CleanAccum(a.id))
   }
@@ -315,6 +332,15 @@ abstract private[spark] class ContextCleanupWorker(sc: SparkContext, name: Strin
   }
 
   def referenceBufferSize(): Int = referenceBuffer.size()
+
+  def visit(f: AnyRef => Unit): Unit = {
+    referenceBuffer.forEach(r => {
+      val referent = r.get()
+      if (referent != null) {
+        f(referent)
+      }
+    })
+  }
 }
 
 private[spark] class BroadcastCleanupWorker(
@@ -510,3 +536,15 @@ private[spark] class SpilledPartitionResultCleanupWorker(sc: SparkContext)
     }
   }
 }
+
+private[spark] class FileScanRDDCleanupWorker(sc: SparkContext)
+  extends ContextCleanupWorker(sc, classOf[CleanFileScanRDD].getName) with Logging {
+
+  override def doCleanup(task: CleanupTask): Unit = {
+    task match {
+      case CleanFileScanRDD(rddId) =>
+      // Noop
+      case _ =>
+    }
+  }
+}
diff --git a/core/src/main/scala/org/apache/spark/MapOutputTracker.scala b/core/src/main/scala/org/apache/spark/MapOutputTracker.scala
@@ -677,6 +677,8 @@ private[spark] abstract class MapOutputTracker(conf: SparkConf) extends Logging
   def unregisterShuffle(shuffleId: Int): Unit
 
   def stop(): Unit = {}
+
+  def shuffleStatusesEstimatedSize(): (Int, Long) = (0, 0L)
 }
 
 /**
@@ -1272,6 +1274,10 @@ private[spark] class MapOutputTrackerMaster(
   }
 
   def mapOutputRequestQueued: Int = 0
+
+  override def shuffleStatusesEstimatedSize(): (Int, Long) = {
+    (shuffleStatuses.size, SizeEstimator.estimate(shuffleStatuses))
+  }
 }
 
 /**
diff --git a/core/src/main/scala/org/apache/spark/internal/config/package.scala b/core/src/main/scala/org/apache/spark/internal/config/package.scala
@@ -1823,6 +1823,12 @@ package object config {
       .booleanConf
       .createWithDefault(false)
 
+  private[spark] val CLEANER_REFERENCE_TRACKING_CLEAN_FILE_SCAN_RDD =
+    ConfigBuilder("spark.cleaner.referenceTracking.cleanFileScanRDD")
+      .version("3.5.0")
+      .booleanConf
+      .createWithDefault(false)
+
   private[spark] val EXECUTOR_LOGS_ROLLING_STRATEGY =
     ConfigBuilder("spark.executor.logs.rolling.strategy")
       .version("1.1.0")
diff --git a/core/src/main/scala/org/apache/spark/scheduler/AnalyticsTaskSchedulerImpl.scala b/core/src/main/scala/org/apache/spark/scheduler/AnalyticsTaskSchedulerImpl.scala
@@ -680,6 +680,9 @@ private[spark] class AnalyticsTaskSchedulerImpl(
           if (TaskState.isFinished(state)) {
             cleanupTaskState(tid)
             taskSet.runningTasksReadyToUpdate.getAndIncrement()
+            if (serializedData != null && serializedData.limit() > 0) {
+              taskSet.totalResultInMemorySize.addAndGet(serializedData.limit())
+            }
             if (state == TaskState.FINISHED) {
               taskResultGetter.enqueueSuccessfulTask(taskSet, tid, serializedData)
             } else if (Set(TaskState.FAILED, TaskState.KILLED, TaskState.LOST).contains(state)) {
diff --git a/core/src/main/scala/org/apache/spark/scheduler/TaskSchedulerImpl.scala b/core/src/main/scala/org/apache/spark/scheduler/TaskSchedulerImpl.scala
@@ -842,6 +842,9 @@ private[spark] class TaskSchedulerImpl(
             if (TaskState.isFinished(state)) {
               cleanupTaskState(tid)
               taskSet.runningTasksReadyToUpdate.getAndIncrement()
+              if (serializedData != null && serializedData.limit() > 0) {
+                taskSet.totalResultInMemorySize.addAndGet(serializedData.limit())
+              }
               taskSet.removeRunningTask(tid)
               if (state == TaskState.FINISHED) {
                 taskResultGetter.enqueueSuccessfulTask(taskSet, tid, serializedData,
diff --git a/core/src/main/scala/org/apache/spark/storage/BlockManagerMasterEndpoint.scala b/core/src/main/scala/org/apache/spark/storage/BlockManagerMasterEndpoint.scala
@@ -39,7 +39,7 @@ import org.apache.spark.scheduler._
 import org.apache.spark.scheduler.cluster.{CoarseGrainedClusterMessages, CoarseGrainedSchedulerBackend}
 import org.apache.spark.shuffle.ShuffleManager
 import org.apache.spark.storage.BlockManagerMessages._
-import org.apache.spark.util.{RpcUtils, ThreadUtils, Utils}
+import org.apache.spark.util.{RpcUtils, SizeEstimator, ThreadUtils, Utils}
 
 /**
  * BlockManagerMasterEndpoint is an [[IsolatedThreadSafeRpcEndpoint]] on the master node to
@@ -956,6 +956,10 @@ class BlockManagerMasterEndpoint(
   }
 
   def askQueued: Int = ThreadUtils.queuedSize(askThreadPool)
+
+  def blockManagerInfoEstimatedSize(): (Int, Long) = {
+    (blockManagerInfo.size, SizeEstimator.estimate(blockManagerInfo))
+  }
 }
 
 @DeveloperApi
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/DataSourceScanExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/DataSourceScanExec.scala
@@ -660,10 +660,12 @@ case class FileSourceScanExec(
       }
     }
 
-    new FileScanRDD(relation.sparkSession, readFile, filePartitions,
+    val fileScanRDD = new FileScanRDD(relation.sparkSession, readFile, filePartitions,
       new StructType(requiredSchema.fields ++ relation.partitionSchema.fields),
       fileConstantMetadataColumns, relation.fileFormat.fileConstantMetadataExtractors,
       new FileSourceOptions(CaseInsensitiveMap(relation.options)))
+    Option(session).foreach(_.sparkContext.cleaner.foreach(_.registerRDDForCleanup(fileScanRDD)))
+    fileScanRDD
   }
 
   /**
@@ -713,10 +715,12 @@ case class FileSourceScanExec(
     val partitions =
       FilePartition.getFilePartitions(relation.sparkSession, splitFiles, maxSplitBytes)
 
-    new FileScanRDD(relation.sparkSession, readFile, partitions,
+    val fileScanRDD = new FileScanRDD(relation.sparkSession, readFile, partitions,
       new StructType(requiredSchema.fields ++ relation.partitionSchema.fields),
       fileConstantMetadataColumns, relation.fileFormat.fileConstantMetadataExtractors,
       new FileSourceOptions(CaseInsensitiveMap(relation.options)))
+    Option(session).foreach(_.sparkContext.cleaner.foreach(_.registerRDDForCleanup(fileScanRDD)))
+    fileScanRDD
   }
 
   // Filters unused DynamicPruningExpression expressions - one which has been replaced
diff --git a/sql/core/src/main/scala/org/apache/spark/status/api/v1/sql/SqlResource.scala b/sql/core/src/main/scala/org/apache/spark/status/api/v1/sql/SqlResource.scala
@@ -24,16 +24,21 @@ import javax.ws.rs.core.MediaType
 
 import scala.util.{Failure, Success, Try}
 
+import com.fasterxml.jackson.databind.node.{JsonNodeFactory, ObjectNode}
 import com.google.common.cache.{Cache, CacheBuilder}
 
-import org.apache.spark.{JobExecutionStatus, SparkEnv}
+import org.apache.spark.{CleanFileScanRDD, JobExecutionStatus, SparkContext, SparkEnv}
 import org.apache.spark.internal.Logging
+import org.apache.spark.internal.config.CLEANER_REFERENCE_TRACKING_CLEAN_FILE_SCAN_RDD
+import org.apache.spark.scheduler.TaskSummary
 import org.apache.spark.sql.execution.BroadcastRelationManager
 import org.apache.spark.sql.execution.BroadcastRelationManager.BroadcastRelationInfo
-import org.apache.spark.sql.execution.ui.{SparkPlanGraph, SparkPlanGraphCluster, SparkPlanGraphNode, SQLAppStatusStore, SQLExecutionUIData}
+import org.apache.spark.sql.execution.datasources.FileScanRDD
+import org.apache.spark.sql.execution.ui.{SQLAppStatusStore, _}
 import org.apache.spark.status.AppStatusStore
 import org.apache.spark.status.api.v1.{BaseAppResource, NotFoundException, StageData, StageStatus}
-import org.apache.spark.util.ThreadUtils
+import org.apache.spark.storage.{BlockManagerMaster, BlockManagerMasterEndpoint}
+import org.apache.spark.util.{SizeEstimator, ThreadUtils}
 
 @Produces(Array(MediaType.APPLICATION_JSON))
 private[v1] class SqlResource extends BaseAppResource with Logging {
@@ -98,6 +103,115 @@ private[v1] class SqlResource extends BaseAppResource with Logging {
     }
   }
 
+  @GET
+  @Path("summary")
+  def memorySummary(
+      @DefaultValue("true") @QueryParam("shuffle") includeShuffle: Boolean,
+      @DefaultValue("true") @QueryParam("broadcast") includeBroadcast: Boolean,
+      @DefaultValue("true") @QueryParam("task") includeTask: Boolean,
+      @DefaultValue("true") @QueryParam("fileScanRDD") includeFileScanRDD: Boolean,
+      @DefaultValue("true") @QueryParam("block") includeBlockManagerInfo: Boolean): ObjectNode = {
+    logInfo("Received request for summary")
+
+    val res = new ObjectNode(JsonNodeFactory.instance)
+    try {
+      if (includeShuffle) {
+        val shuffleStatus = SparkEnv.get.mapOutputTracker.shuffleStatusesEstimatedSize()
+
+        val shuffle = new ObjectNode(JsonNodeFactory.instance)
+        shuffle.put("count", shuffleStatus._1)
+        shuffle.put("estimatedSize", shuffleStatus._2)
+        res.set("shuffleStatus", shuffle)
+      }
+
+      if (includeBroadcast) {
+        val count = BroadcastRelationManager.allInMemBroadcastRelations().size
+        val relationsSize = BroadcastRelationManager.allInMemBroadcastRelations().
+          map(_.size).sum
+
+        val broadcast = new ObjectNode(JsonNodeFactory.instance)
+        broadcast.put("count", count)
+        broadcast.put("relationsSize", relationsSize)
+        res.set("broadcastRelation", broadcast)
+      }
+
+      SparkContext.getActive.foreach(sc => {
+        if (includeTask) {
+          val taskSummary = sc.taskScheduler.taskSummary()
+          res.set("task", taskSummaryToJson(taskSummary))
+        }
+
+        if (includeFileScanRDD) {
+          var count = 0L
+          var estimatedSize = 0L
+          try {
+            val contextCleanupWorker = sc.cleaner.get.
+              getContextCleanupWorker(classOf[CleanFileScanRDD].getName)
+
+            if (sc.conf.get(CLEANER_REFERENCE_TRACKING_CLEAN_FILE_SCAN_RDD)) {
+              contextCleanupWorker.visit(r => {
+                if (r != null && r.isInstanceOf[FileScanRDD]) {
+                  estimatedSize += SizeEstimator.estimate(
+                    r.asInstanceOf[FileScanRDD].filePartitions)
+                  count += 1
+                }
+              })
+            }
+          } catch {
+            case t: Throwable =>
+              logError("Failed to get fileScanRDDSize", t)
+          } finally {
+            val fileScanRDD = new ObjectNode(JsonNodeFactory.instance)
+            fileScanRDD.put("count", count)
+            fileScanRDD.put("estimatedSize", estimatedSize)
+            res.set("fileScanRDD", fileScanRDD)
+          }
+        }
+      })
+
+      if (includeBlockManagerInfo) {
+        try {
+          val endpoint = SparkEnv.get.rpcEnv.getEndpoint(BlockManagerMaster.DRIVER_ENDPOINT_NAME)
+          if (endpoint != null) {
+            val (count, estimatedSize) = endpoint.
+              asInstanceOf[BlockManagerMasterEndpoint].blockManagerInfoEstimatedSize()
+
+            val blockManagerInfo = new ObjectNode(JsonNodeFactory.instance)
+            blockManagerInfo.put("count", count)
+            blockManagerInfo.put("estimatedSize", estimatedSize)
+            res.set("blockManagerInfo", blockManagerInfo)
+          }
+        } catch {
+          case t: Throwable =>
+            logError("Failed to get blockManagerInfoEstimateSize", t)
+        }
+      }
+
+      res.put("valid", true)
+    } catch {
+      case e: Exception =>
+        res.put("valid", false)
+        res.put("message", e.getMessage)
+    }
+    logInfo(s"driver summary: ${res}")
+    res
+  }
+
+  def taskSummaryToJson(s: TaskSummary): ObjectNode = {
+    val res = new ObjectNode(JsonNodeFactory.instance)
+    res.put("totalTasks", s.totalTasks)
+    res.put("runningTasks", s.runningTasks)
+    res.put("successfulTasks", s.successfulTasks)
+    res.put("zombieTasks", s.zombieTasks)
+    res.put("zombieTaskSets", s.zombieTaskSets)
+    res.put("activeTaskSets", s.activeTaskSets)
+    res.put("freeCores", s.freeCores)
+    res.put("runningTasksReadyToUpdate", s.runningTasksReadyToUpdate)
+    res.put("totalResultSize", s.totalResultSize)
+    res.put("totalResultInMemorySize", s.totalResultInMemorySize)
+    res
+  }
+
   @GET
   @Path("context-cleaner")
   def contextCleaner(): Seq[(String, Int)] = {

Original file line number	Diff line number	Diff line change
`@@ -677,6 +677,8 @@ private[spark] abstract class MapOutputTracker(conf: SparkConf) extends Logging`
`677`	`677`	`def unregisterShuffle(shuffleId: Int): Unit`
`678`	`678`
`679`	`679`	`def stop(): Unit = {}`
	`680`	`+`
	`681`	`+ def shuffleStatusesEstimatedSize(): (Int, Long) = (0, 0L)`
`680`	`682`	`}`
`681`	`683`
`682`	`684`	`/**`
`@@ -1272,6 +1274,10 @@ private[spark] class MapOutputTrackerMaster(`
`1272`	`1274`	`}`
`1273`	`1275`
`1274`	`1276`	`def mapOutputRequestQueued: Int = 0`
	`1277`	`+`
	`1278`	`+ override def shuffleStatusesEstimatedSize(): (Int, Long) = {`
	`1279`	`+ (shuffleStatuses.size, SizeEstimator.estimate(shuffleStatuses))`
	`1280`	`+ }`
`1275`	`1281`	`}`
`1276`	`1282`
`1277`	`1283`	`/**`
Original file line number	Diff line number	Diff line change
`@@ -660,10 +660,12 @@ case class FileSourceScanExec(`
`660`	`660`	`}`
`661`	`661`	`}`
`662`	`662`
`663`		`- new FileScanRDD(relation.sparkSession, readFile, filePartitions,`
	`663`	`+ val fileScanRDD = new FileScanRDD(relation.sparkSession, readFile, filePartitions,`
`664`	`664`	`new StructType(requiredSchema.fields ++ relation.partitionSchema.fields),`
`665`	`665`	`fileConstantMetadataColumns, relation.fileFormat.fileConstantMetadataExtractors,`
`666`	`666`	`new FileSourceOptions(CaseInsensitiveMap(relation.options)))`
	`667`	`+ Option(session).foreach(_.sparkContext.cleaner.foreach(_.registerRDDForCleanup(fileScanRDD)))`
	`668`	`+ fileScanRDD`
`667`	`669`	`}`
`668`	`670`
`669`	`671`	`/**`
`@@ -713,10 +715,12 @@ case class FileSourceScanExec(`
`713`	`715`	`val partitions =`
`714`	`716`	`FilePartition.getFilePartitions(relation.sparkSession, splitFiles, maxSplitBytes)`
`715`	`717`
`716`		`- new FileScanRDD(relation.sparkSession, readFile, partitions,`
	`718`	`+ val fileScanRDD = new FileScanRDD(relation.sparkSession, readFile, partitions,`
`717`	`719`	`new StructType(requiredSchema.fields ++ relation.partitionSchema.fields),`
`718`	`720`	`fileConstantMetadataColumns, relation.fileFormat.fileConstantMetadataExtractors,`
`719`	`721`	`new FileSourceOptions(CaseInsensitiveMap(relation.options)))`
	`722`	`+ Option(session).foreach(_.sparkContext.cleaner.foreach(_.registerRDDForCleanup(fileScanRDD)))`
	`723`	`+ fileScanRDD`
`720`	`724`	`}`
`721`	`725`
`722`	`726`	`// Filters unused DynamicPruningExpression expressions - one which has been replaced`