apache · dongjoon-hyun · Jul 21, 2025 · Jul 22, 2025 · Jul 22, 2025 · Jul 23, 2025
diff --git a/core/src/main/scala/org/apache/spark/deploy/history/FsHistoryProvider.scala b/core/src/main/scala/org/apache/spark/deploy/history/FsHistoryProvider.scala
@@ -324,6 +324,10 @@ private[history] class FsHistoryProvider(conf: SparkConf, clock: Clock)
     val app = try {
       load(appId)
      } catch {
+      case _: NoSuchElementException if this.conf.get(ON_DEMAND_ENABLED) =>
+        val name = Utils.nameForAppAndAttempt(appId, attemptId)
+        loadFromFallbackLocation(appId, attemptId,
+          RollingEventLogFilesWriter.EVENT_LOG_DIR_NAME_PREFIX + name)
       case _: NoSuchElementException =>
         return None
     }
@@ -364,6 +368,16 @@ private[history] class FsHistoryProvider(conf: SparkConf, clock: Clock)
     Some(loadedUI)
   }
 
+  private def loadFromFallbackLocation(appId: String, attemptId: Option[String], logPath: String)
+    : ApplicationInfoWrapper = {
+    val date = new Date(0)
+    val info = ApplicationAttemptInfo(attemptId, date, date, date, 0, "spark", false, "unknown")
+    addListing(new ApplicationInfoWrapper(
+      ApplicationInfo(appId, appId, None, None, None, None, List.empty),
+      List(new AttemptInfoWrapper(info, logPath, 0, Some(1), None, None, None, None))))
+    load(appId)
+  }
+
   override def getEmptyListingHtml(): Seq[Node] = {
     <p>
       Did you specify the correct logging directory? Please verify your setting of

diff --git a/core/src/main/scala/org/apache/spark/internal/config/History.scala b/core/src/main/scala/org/apache/spark/internal/config/History.scala
@@ -54,6 +54,12 @@ private[spark] object History {
     .checkValue(v => v > 0, "The update batchSize should be a positive integer.")
     .createWithDefault(Int.MaxValue)
 
+  val ON_DEMAND_ENABLED = ConfigBuilder("spark.history.fs.update.onDemandEnabled")
+    .version("4.1.0")
+    .doc("Whether to look up rolling event log locations on demand manner before listing files.")
+    .booleanConf
+    .createWithDefault(true)
+
   val CLEANER_ENABLED = ConfigBuilder("spark.history.fs.cleaner.enabled")
     .version("1.4.0")
     .doc("Whether the History Server should periodically clean up event logs from storage")

diff --git a/core/src/test/scala/org/apache/spark/deploy/history/FsHistoryProviderSuite.scala b/core/src/test/scala/org/apache/spark/deploy/history/FsHistoryProviderSuite.scala
@@ -1640,6 +1640,31 @@ abstract class FsHistoryProviderSuite extends SparkFunSuite with Matchers with P
     }
   }
 
+  test("SPARK-52914: Support spark.history.fs.update.onDemandEnabled") {
+    Seq(true, false).foreach { onDemandEnabled =>
+      withTempDir { dir =>
+        val conf = createTestConf(true)
+        conf.set(HISTORY_LOG_DIR, dir.getAbsolutePath)
+        conf.set(ON_DEMAND_ENABLED, onDemandEnabled)
+        val hadoopConf = SparkHadoopUtil.newConfiguration(conf)
+        val provider = new FsHistoryProvider(conf)
+
+        val writer1 = new RollingEventLogFilesWriter("app1", None, dir.toURI, conf, hadoopConf)
+        writer1.start()
+        writeEventsToRollingWriter(writer1, Seq(
+          SparkListenerApplicationStart("app1", Some("app1"), 0, "user", None),
+          SparkListenerJobStart(1, 0, Seq.empty)), rollFile = false)
+        writer1.stop()
+
+        assert(provider.getListing().length === 0)
+        assert(dir.listFiles().length === 1)
+        assert(provider.getAppUI("app1", None).isDefined == onDemandEnabled)
+
+        provider.stop()
+      }
+    }
+  }
+
   test("SPARK-36354: EventLogFileReader should skip rolling event log directories with no logs") {
     withTempDir { dir =>
       val conf = createTestConf(true)