-
Notifications
You must be signed in to change notification settings - Fork 29k
SPARK-4705:[core] Write event logs of different application attempts to different files. #4845
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
7484b9d
64a33d0
0762e86
cc9311e
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -370,6 +370,7 @@ class SparkContext(config: SparkConf) extends Logging with ExecutorAllocationCli | |
| taskScheduler.start() | ||
|
|
||
| val applicationId: String = taskScheduler.applicationId() | ||
| val applicationAttemptId: String = taskScheduler.applicationAttemptId() | ||
| conf.set("spark.app.id", applicationId) | ||
|
|
||
| env.blockManager.initialize(applicationId) | ||
|
|
@@ -385,8 +386,8 @@ class SparkContext(config: SparkConf) extends Logging with ExecutorAllocationCli | |
| // Optionally log Spark events | ||
| private[spark] val eventLogger: Option[EventLoggingListener] = { | ||
| if (isEventLogEnabled) { | ||
| val logger = | ||
| new EventLoggingListener(applicationId, eventLogDir.get, conf, hadoopConfiguration) | ||
| val logger = new EventLoggingListener( | ||
| applicationId, applicationAttemptId, eventLogDir.get, conf, hadoopConfiguration) | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. nit: indented too far |
||
| logger.start() | ||
| listenerBus.addListener(logger) | ||
| Some(logger) | ||
|
|
@@ -1735,7 +1736,7 @@ class SparkContext(config: SparkConf) extends Logging with ExecutorAllocationCli | |
| // Note: this code assumes that the task scheduler has been initialized and has contacted | ||
| // the cluster manager to get an application ID (in case the cluster manager provides one). | ||
| listenerBus.post(SparkListenerApplicationStart(appName, Some(applicationId), | ||
| startTime, sparkUser)) | ||
| startTime, sparkUser, applicationAttemptId)) | ||
| } | ||
|
|
||
| /** Post the application end event */ | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -26,7 +26,8 @@ private[spark] case class ApplicationHistoryInfo( | |
| endTime: Long, | ||
| lastUpdated: Long, | ||
| sparkUser: String, | ||
| completed: Boolean = false) | ||
| completed: Boolean = false, | ||
| appAttemptId: String = "") | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
|
||
|
|
||
| private[spark] abstract class ApplicationHistoryProvider { | ||
|
|
||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -208,10 +208,17 @@ private[history] class FsHistoryProvider(conf: SparkConf) extends ApplicationHis | |
| if (!logInfos.isEmpty) { | ||
| val newApps = new mutable.LinkedHashMap[String, FsApplicationHistoryInfo]() | ||
| def addIfAbsent(info: FsApplicationHistoryInfo) = { | ||
| if (!newApps.contains(info.id) || | ||
| newApps(info.id).logPath.endsWith(EventLoggingListener.IN_PROGRESS) && | ||
| val key = | ||
| if (info.appAttemptId.equals("")) { | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. +1 to Sean's suggestion of using |
||
| info.id | ||
| } else { | ||
| info.id + "_" + info.appAttemptId | ||
| } | ||
|
|
||
| if (!newApps.contains(key) || | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Feels like you should add parentheses here somewhere to clarify in which order the conditions should be parsed, since you're mixing |
||
| newApps(key).logPath.endsWith(EventLoggingListener.IN_PROGRESS) && | ||
| !info.logPath.endsWith(EventLoggingListener.IN_PROGRESS)) { | ||
| newApps += (info.id -> info) | ||
| newApps += (key -> info) | ||
| } | ||
| } | ||
|
|
||
|
|
@@ -309,7 +316,8 @@ private[history] class FsHistoryProvider(conf: SparkConf) extends ApplicationHis | |
| appListener.endTime.getOrElse(-1L), | ||
| getModificationTime(eventLog).get, | ||
| appListener.sparkUser.getOrElse(NOT_STARTED), | ||
| isApplicationCompleted(eventLog)) | ||
| isApplicationCompleted(eventLog), | ||
| appListener.appAttemptId.getOrElse("")) | ||
| } finally { | ||
| logInput.close() | ||
| } | ||
|
|
@@ -410,5 +418,7 @@ private class FsApplicationHistoryInfo( | |
| endTime: Long, | ||
| lastUpdated: Long, | ||
| sparkUser: String, | ||
| completed: Boolean = true) | ||
| extends ApplicationHistoryInfo(id, name, startTime, endTime, lastUpdated, sparkUser, completed) | ||
| completed: Boolean = true, | ||
| appAttemptId: String ="") | ||
| extends ApplicationHistoryInfo( | ||
| id, name, startTime, endTime, lastUpdated, sparkUser, completed, appAttemptId) | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -22,6 +22,9 @@ import javax.servlet.http.HttpServletRequest | |
| import scala.xml.Node | ||
|
|
||
| import org.apache.spark.ui.{WebUIPage, UIUtils} | ||
| import scala.collection.immutable.ListMap | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. nit: group with other |
||
| import scala.collection.mutable.HashMap | ||
| import scala.collection.mutable.ArrayBuffer | ||
|
|
||
| private[spark] class HistoryPage(parent: HistoryServer) extends WebUIPage("") { | ||
|
|
||
|
|
@@ -34,18 +37,31 @@ private[spark] class HistoryPage(parent: HistoryServer) extends WebUIPage("") { | |
| val requestedIncomplete = | ||
| Option(request.getParameter("showIncomplete")).getOrElse("false").toBoolean | ||
|
|
||
| val allApps = parent.getApplicationList().filter(_.completed != requestedIncomplete) | ||
| val actualFirst = if (requestedFirst < allApps.size) requestedFirst else 0 | ||
| val apps = allApps.slice(actualFirst, Math.min(actualFirst + pageSize, allApps.size)) | ||
|
|
||
| val allCompletedAppsNAttempts = | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
|
||
| parent.getApplicationList().filter(_.completed != requestedIncomplete) | ||
| val (hasAttemptInfo, appToAttemptMap) = getApplicationLevelList(allCompletedAppsNAttempts) | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'm a little confused about this code, so let me suggest something different. Why not turn the listing into this: That should make it easier to handle apps with a single and multiple attemps with code that's mostly the same. To avoid exploding memory usage, the code that does this translation (from
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. BTW with the map you could do:
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Hi, If at a particular time, there are few event logs, which have attempt info, and there are few, which do no have. Then we display the same number of columns irrespective of the event log having attempt info or not. To do above, we need just one flag of hasAttemptInfo for all the applications we have, and it is not per application. If we do that map logic, then we will need to iterate again on that map, after creating the same. Regarding that memory exploding, if we first slice, and at a time ( in the case, when history server is restarting with some event logs already there ), then we might see one page's UI without attempt info, and another page with attempt UI. Is it fine?
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We should slice on the applications, and then build the table with all the applications' attempts. So every page shows "x" applications, not attempts. After you apply the pagination logic, iterating is cheap ("x" is a small number), so calculating Perhaps a more efficient way of doing this would be to make I just think the code you currently have is a little confusing, and a little inefficient. If you have a really long list of applications, it would waste a lot of cycles reading through it and copying things that would just be thrown away.
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Just to make my suggestions a little more concrete, I'm suggesting something like this:
So now you have a list of applications being returned, and each application has information about all its attempts. Now it's much easier to slice that list and choose what to render in
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Hi @vanzin , Thanks for adding the concrete suggestion. I will incorporate the same.
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Hi @vanzin , If we change to this data structure, then logic at getAppUI and getApplicationLevelList at FsHistoryProvider will get a bit more complicated. Also, If there is any extension written for ApplicationHistoryInfo, that will also get impacted. As per my understanding, what we are trying to achieve is get the list itself as a mapping from application id => List of Attempts. I will prefer a utility method to get such mapped list, rather than modifying the structure. Following is the basis for the same ( regarding complication of FsHistoryProvider only ):
Please consider these points, as compared to the utility method to get the same while creating HistoryPage.
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes, those methods will get a little more complicated. But without changing those, HistoryPage will have to implement pretty much the same transformation. I think doing that in HistoryPage is actually more complicated, and much less efficient. If you build the better data structure in FsHistoryProvider, it's done once. If you do it in HistoryPage, it's done for every client request. So you need to be doubly careful about memory and cpu usage, because for large application lists, copying the whole thing would be very expensive. The "level of indirection" you mention is a hashtable lookup, which is very cheap. That will get you the list of attemps for a particular application, and processing that list (which in the most common case will have a single entry) is rather easy. |
||
|
|
||
| val allAppsSize = allCompletedAppsNAttempts.size | ||
|
|
||
| val actualFirst = if (requestedFirst < allAppsSize) requestedFirst else 0 | ||
| val apps = | ||
| allCompletedAppsNAttempts.slice(actualFirst, Math.min(actualFirst + pageSize, allAppsSize)) | ||
| val appWithAttemptsDisplayList = | ||
| appToAttemptMap.slice(actualFirst, Math.min(actualFirst + pageSize, allAppsSize)) | ||
|
|
||
| val actualPage = (actualFirst / pageSize) + 1 | ||
| val last = Math.min(actualFirst + pageSize, allApps.size) - 1 | ||
| val pageCount = allApps.size / pageSize + (if (allApps.size % pageSize > 0) 1 else 0) | ||
| val last = Math.min(actualFirst + pageSize, allAppsSize) - 1 | ||
| val pageCount = allAppsSize / pageSize + (if (allAppsSize % pageSize > 0) 1 else 0) | ||
|
|
||
| val secondPageFromLeft = 2 | ||
| val secondPageFromRight = pageCount - 1 | ||
|
|
||
| val appTable = UIUtils.listingTable(appHeader, appRow, apps) | ||
| val appTable = | ||
| if (hasAttemptInfo) { | ||
| UIUtils.listingTable(appWithAttemptHeader, appWithAttemptRow, appWithAttemptsDisplayList) | ||
| } else { | ||
| UIUtils.listingTable(appHeader, appRow, apps) | ||
| } | ||
| val providerConfig = parent.getProviderConfig() | ||
| val content = | ||
| <div class="row-fluid"> | ||
|
|
@@ -59,15 +75,15 @@ private[spark] class HistoryPage(parent: HistoryServer) extends WebUIPage("") { | |
| // to the first and last page. If the current page +/- `plusOrMinus` is greater | ||
| // than the 2nd page from the first page or less than the 2nd page from the last | ||
| // page, `...` will be displayed. | ||
| if (allApps.size > 0) { | ||
| if (allAppsSize > 0) { | ||
| val leftSideIndices = | ||
| rangeIndices(actualPage - plusOrMinus until actualPage, 1 < _, requestedIncomplete) | ||
| val rightSideIndices = | ||
| rangeIndices(actualPage + 1 to actualPage + plusOrMinus, _ < pageCount, | ||
| requestedIncomplete) | ||
|
|
||
| <h4> | ||
| Showing {actualFirst + 1}-{last + 1} of {allApps.size} | ||
| Showing {actualFirst + 1}-{last + 1} of {allAppsSize} | ||
| {if (requestedIncomplete) "(Incomplete applications)"} | ||
| <span style="float: right"> | ||
| { | ||
|
|
@@ -113,6 +129,36 @@ private[spark] class HistoryPage(parent: HistoryServer) extends WebUIPage("") { | |
| </div> | ||
| UIUtils.basicSparkPage(content, "History Server") | ||
| } | ||
|
|
||
| private def getApplicationLevelList (appNattemptList: Iterable[ApplicationHistoryInfo]) ={ | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. return type? |
||
| // Create HashMap as per the multiple attempts for one application. | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Feels like this should be in a scaladoc comment. |
||
| // If there is no attempt specific stuff, then | ||
| // do return false, to indicate the same, so that previous UI gets displayed. | ||
| var hasAttemptInfo = false | ||
| val appToAttemptInfo = new HashMap[String, ArrayBuffer[ApplicationHistoryInfo]] | ||
| for( appAttempt <- appNattemptList) { | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. All throughout the code: |
||
| if(!appAttempt.appAttemptId.equals("")){ | ||
| hasAttemptInfo = true | ||
| val attemptId = appAttempt.appAttemptId.toInt | ||
| if(appToAttemptInfo.contains(appAttempt.id)){ | ||
| val currentAttempts = appToAttemptInfo.get(appAttempt.id).get | ||
| currentAttempts += appAttempt | ||
| appToAttemptInfo.put( appAttempt.id, currentAttempts) | ||
| } else { | ||
| val currentAttempts = new ArrayBuffer[ApplicationHistoryInfo]() | ||
| currentAttempts += appAttempt | ||
| appToAttemptInfo.put( appAttempt.id, currentAttempts ) | ||
| } | ||
| }else { | ||
| val currentAttempts = new ArrayBuffer[ApplicationHistoryInfo]() | ||
| currentAttempts += appAttempt | ||
| appToAttemptInfo.put(appAttempt.id, currentAttempts) | ||
| } | ||
| } | ||
| val sortedMap = ListMap(appToAttemptInfo.toSeq.sortWith(_._1 > _._1):_*) | ||
| (hasAttemptInfo, sortedMap) | ||
| } | ||
|
|
||
|
|
||
| private val appHeader = Seq( | ||
| "App ID", | ||
|
|
@@ -128,6 +174,16 @@ private[spark] class HistoryPage(parent: HistoryServer) extends WebUIPage("") { | |
| range.filter(condition).map(nextPage => | ||
| <a href={makePageLink(nextPage, showIncomplete)}> {nextPage} </a>) | ||
| } | ||
|
|
||
| private val appWithAttemptHeader = Seq( | ||
| "App ID", | ||
| "App Name", | ||
| "Attempt ID", | ||
| "Started", | ||
| "Completed", | ||
| "Duration", | ||
| "Spark User", | ||
| "Last Updated") | ||
|
|
||
| private def appRow(info: ApplicationHistoryInfo): Seq[Node] = { | ||
| val uiAddress = HistoryServer.UI_PATH_PREFIX + s"/${info.id}" | ||
|
|
@@ -146,6 +202,69 @@ private[spark] class HistoryPage(parent: HistoryServer) extends WebUIPage("") { | |
| <td sorttable_customkey={info.lastUpdated.toString}>{lastUpdated}</td> | ||
| </tr> | ||
| } | ||
|
|
||
| private def getAttemptURI(attemptInfo: ApplicationHistoryInfo, | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. nit: style: |
||
| returnEmptyIfAttemptInfoNull: Boolean = true ) = { | ||
| if (attemptInfo.appAttemptId.equals("")) { | ||
| if(returnEmptyIfAttemptInfoNull) { | ||
| attemptInfo.appAttemptId | ||
| } else { | ||
| HistoryServer.UI_PATH_PREFIX + s"/${attemptInfo.id}" | ||
| } | ||
| } else { | ||
| HistoryServer.UI_PATH_PREFIX + s"/${attemptInfo.id}" + "_" + s"${attemptInfo.appAttemptId}" | ||
| } | ||
| } | ||
|
|
||
| private def firstAttemptRow(attemptInfo : ApplicationHistoryInfo) = { | ||
| val uiAddress = | ||
| if (attemptInfo.appAttemptId.equals("")) { | ||
| attemptInfo.appAttemptId | ||
| } else { | ||
| HistoryServer.UI_PATH_PREFIX + s"/${attemptInfo.id}" + "_" + s"${attemptInfo.appAttemptId}" | ||
| } | ||
|
|
||
| val startTime = UIUtils.formatDate(attemptInfo.startTime) | ||
| val endTime = UIUtils.formatDate(attemptInfo.endTime) | ||
| val duration = UIUtils.formatDuration(attemptInfo.endTime - attemptInfo.startTime) | ||
| val lastUpdated = UIUtils.formatDate(attemptInfo.lastUpdated) | ||
| val attemptId = attemptInfo.appAttemptId | ||
| <td><a href={uiAddress}>{attemptId}</a></td> | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. nit: indentation
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Does scala actually consider this line as a continuation of the previous line? The inline XML syntax has always looked super weird to me...
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes, it do so. [ For case, even I took some time to figure out the same ] |
||
| <td sorttable_customkey={attemptInfo.startTime.toString}>{startTime}</td> | ||
| <td sorttable_customkey={attemptInfo.endTime.toString}>{endTime}</td> | ||
| <td sorttable_customkey={(attemptInfo.endTime - attemptInfo.startTime).toString}> | ||
| {duration}</td> | ||
| <td>{attemptInfo.sparkUser}</td> | ||
| <td sorttable_customkey={attemptInfo.lastUpdated.toString}>{lastUpdated}</td> | ||
| } | ||
|
|
||
| private def attemptRow(attemptInfo: ApplicationHistoryInfo) = { | ||
| <tr> | ||
| {firstAttemptRow(attemptInfo)} | ||
| </tr> | ||
| } | ||
|
|
||
| private def appWithAttemptRow( | ||
| appAttemptsInfo: (String,ArrayBuffer[ApplicationHistoryInfo])): Seq[Node] = { | ||
| val applicationId = appAttemptsInfo._1 | ||
| val info = appAttemptsInfo._2 | ||
| val rowSpan = info.length | ||
| val rowSpanString = rowSpan.toString | ||
| val applicatioName = info(0).name | ||
| val lastAttemptURI = getAttemptURI(info(0), false) | ||
| val ttAttempts = info.slice(1, rowSpan -1) | ||
| val x = new xml.NodeBuffer | ||
| x += | ||
| <tr> | ||
| <td rowspan={rowSpanString}><a href={lastAttemptURI}>{applicationId}</a></td> | ||
| <td rowspan={rowSpanString}>{applicatioName}</td> | ||
| { firstAttemptRow(info(0)) } | ||
| </tr>; | ||
| for( i <- 1 until rowSpan ){ | ||
| x += attemptRow(info(i)) | ||
| } | ||
| x | ||
| } | ||
|
|
||
| private def makePageLink(linkPage: Int, showIncomplete: Boolean): String = { | ||
| "/?" + Array( | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Does this need to be public?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Just kept it on similar lines as applicationId. An application with different attempt id, is similar to different application in many ways.