-
Notifications
You must be signed in to change notification settings - Fork 29k
[SPARK-53333][SS] Enable StateDataSource with state checkpoint v2 (only readChangeFeed) #52148
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 6 commits
c0aeeea
f96a9d5
c270c63
2c342b1
839251c
7024ad8
e8682cf
1d7420e
4804b54
0f603ab
e5027bc
bada482
41d9301
72000d5
e3e4591
8357be5
dc8ead3
c67f06c
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -430,14 +430,14 @@ trait StateStoreWriter | |
| * Set the SQL metrics related to the state store. | ||
| * This should be called in that task after the store has been updated. | ||
| */ | ||
| protected def setStoreMetrics(store: StateStore): Unit = { | ||
| protected def setStoreMetrics(store: StateStore, setCheckpointInfo: Boolean = true): Unit = { | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Hmm why do we need this change ?
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. In |
||
| val storeMetrics = store.metrics | ||
| longMetric("numTotalStateRows") += storeMetrics.numKeys | ||
| longMetric("stateMemory") += storeMetrics.memoryUsedBytes | ||
| setStoreCustomMetrics(storeMetrics.customMetrics) | ||
| setStoreInstanceMetrics(storeMetrics.instanceMetrics) | ||
|
|
||
| if (StatefulOperatorStateInfo.enableStateStoreCheckpointIds(conf)) { | ||
| if (StatefulOperatorStateInfo.enableStateStoreCheckpointIds(conf) && setCheckpointInfo) { | ||
| // Set the state store checkpoint information for the driver to collect | ||
| val ssInfo = store.getStateStoreCheckpointInfo() | ||
| setStateStoreCheckpointInfo( | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -1064,8 +1064,15 @@ private[sql] class HDFSBackedStateStoreProvider extends StateStoreProvider with | |
| override def getStateStoreChangeDataReader( | ||
| startVersion: Long, | ||
| endVersion: Long, | ||
| colFamilyNameOpt: Option[String] = None): | ||
| colFamilyNameOpt: Option[String] = None, | ||
| endVersionStateStoreCkptId: Option[String] = None): | ||
| StateStoreChangeDataReader = { | ||
|
|
||
| if (endVersionStateStoreCkptId.isDefined) { | ||
| throw QueryExecutionErrors.cannotLoadStore(new SparkException( | ||
|
||
| "HDFSBackedStateStoreProvider does not support endVersionStateStoreCkptId")) | ||
| } | ||
|
|
||
| // Multiple column families are not supported with HDFSBackedStateStoreProvider | ||
| if (colFamilyNameOpt.isDefined) { | ||
| throw StateStoreErrors.multipleColumnFamiliesNotSupported(providerName) | ||
|
|
@@ -1099,7 +1106,7 @@ class HDFSBackedStateStoreChangeDataReader( | |
| extends StateStoreChangeDataReader( | ||
| fm, stateLocation, startVersion, endVersion, compressionCodec) { | ||
|
|
||
| override protected var changelogSuffix: String = "delta" | ||
| override protected val changelogSuffix: String = "delta" | ||
|
|
||
| override def getNext(): (RecordType.Value, UnsafeRow, UnsafeRow, Long) = { | ||
| val reader = currentChangelogReader() | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -342,6 +342,51 @@ class RocksDB( | |
| currLineage | ||
| } | ||
|
|
||
| /** | ||
| * Construct the full lineage from startVersion to endVersion (inclusive) by | ||
| * walking backwards using lineage information embedded in changelog files. | ||
| */ | ||
| def getFullLineage( | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can we add some unit tests for this new function? The logic seems quite complicated, want to make sure we can test all edge cases. Particularly the error cases.
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Added |
||
| startVersion: Long, | ||
| endVersion: Long, | ||
| endVersionStateStoreCkptId: Option[String]): Array[LineageItem] = { | ||
| assert(startVersion <= endVersion, | ||
| s"startVersion $startVersion should be less than or equal to endVersion $endVersion") | ||
|
|
||
| // A buffer to collect the lineage information, the entries should be decreasing in version | ||
| val buf = mutable.ArrayBuffer[LineageItem]() | ||
| buf.append(LineageItem(endVersion, endVersionStateStoreCkptId.get)) | ||
|
|
||
| while (buf.last.version > startVersion) { | ||
| val prevSmallestVersion = buf.last.version | ||
| val lineage = getLineageFromChangelogFile(buf.last.version, Some(buf.last.checkpointUniqueId)) | ||
| // lineage array is sorted in increasing order, we need to reverse it | ||
| val lineageSorted = lineage.filter(_.version >= startVersion).sortBy(_.version).reverse | ||
|
||
| // append to the buffer in reverse order, so the buffer is always decreasing in version | ||
| buf.appendAll(lineageSorted) | ||
|
|
||
| // to prevent infinite loop if we make no progress, throw an exception | ||
| if (buf.last.version == prevSmallestVersion) { | ||
| throw new IllegalStateException(s"Lineage is not complete") | ||
|
||
| } | ||
| } | ||
|
|
||
| // we return the lineage in increasing order | ||
| val ret = buf.reverse.toArray | ||
|
|
||
| // Sanity checks | ||
| assert(ret.head.version == startVersion, | ||
| s"Expected first lineage version to be $startVersion, but got ${ret.head.version}") | ||
| assert(ret.last.version == endVersion, | ||
| s"Expected last lineage version to be $endVersion, but got ${ret.last.version}") | ||
| // Assert that the lineage array is strictly increasing in version | ||
| assert(ret.sliding(2).forall { | ||
|
||
| case Array(prev, next) => prev.version + 1 == next.version | ||
| case _ => true | ||
| }, s"Lineage array is not strictly increasing in version") | ||
|
|
||
| ret | ||
| } | ||
|
|
||
| /** | ||
| * Load the given version of data in a native RocksDB instance. | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -877,15 +877,18 @@ private[sql] class RocksDBStateStoreProvider | |
| override def getStateStoreChangeDataReader( | ||
| startVersion: Long, | ||
| endVersion: Long, | ||
| colFamilyNameOpt: Option[String] = None): | ||
| colFamilyNameOpt: Option[String] = None, | ||
| endVersionStateStoreCkptId: Option[String] = None): | ||
| StateStoreChangeDataReader = { | ||
| val statePath = stateStoreId.storeCheckpointLocation() | ||
| val sparkConf = Option(SparkEnv.get).map(_.conf).getOrElse(new SparkConf) | ||
| new RocksDBStateStoreChangeDataReader( | ||
| CheckpointFileManager.create(statePath, hadoopConf), | ||
| rocksDB, | ||
| statePath, | ||
| startVersion, | ||
| endVersion, | ||
| endVersionStateStoreCkptId, | ||
| CompressionCodec.createCodec(sparkConf, storeConf.compressionCodec), | ||
| keyValueEncoderMap, | ||
| colFamilyNameOpt) | ||
|
|
@@ -1224,17 +1227,32 @@ object RocksDBStateStoreProvider { | |
| /** [[StateStoreChangeDataReader]] implementation for [[RocksDBStateStoreProvider]] */ | ||
| class RocksDBStateStoreChangeDataReader( | ||
| fm: CheckpointFileManager, | ||
| rocksDB: RocksDB, | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Hm, seems a little strange to me that we are passing in RocksDB here in its entirety just so we can use getFullLineage. Is there a way to abstract out the getFullLineage functionality so we can reuse it a different way?
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I initially had done some refactoring to refactor all the lineage related methods to RocksDBFileManager and only pass that in here. I did not do it in this PR just to reduce the amount of changes in this PR. At a glance, all the lineage related methods ( I am not sure if we want to do this refactoring in the PR.
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Sure that's fine. We don't have to do it in this PR |
||
| stateLocation: Path, | ||
| startVersion: Long, | ||
| endVersion: Long, | ||
| endVersionStateStoreCkptId: Option[String], | ||
| compressionCodec: CompressionCodec, | ||
| keyValueEncoderMap: | ||
| ConcurrentHashMap[String, (RocksDBKeyStateEncoder, RocksDBValueStateEncoder, Short)], | ||
| colFamilyNameOpt: Option[String] = None) | ||
| extends StateStoreChangeDataReader( | ||
| fm, stateLocation, startVersion, endVersion, compressionCodec, colFamilyNameOpt) { | ||
|
|
||
| override protected var changelogSuffix: String = "changelog" | ||
| override protected val versionsAndUniqueIds: Array[(Long, Option[String])] = | ||
| if (endVersionStateStoreCkptId.isDefined) { | ||
| val fullVersionLineage = rocksDB.getFullLineage( | ||
| startVersion, | ||
| endVersion, | ||
| endVersionStateStoreCkptId) | ||
| fullVersionLineage | ||
| .sortBy(_.version) | ||
| .map(item => (item.version, Some(item.checkpointUniqueId))) | ||
| } else { | ||
| (startVersion to endVersion).map((_, None)).toArray | ||
| } | ||
|
|
||
| override protected val changelogSuffix: String = "changelog" | ||
|
|
||
| override def getNext(): (RecordType.Value, UnsafeRow, UnsafeRow, Long) = { | ||
| var currRecord: (RecordType.Value, Array[Byte], Array[Byte]) = null | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -447,6 +447,7 @@ abstract class StateStoreChangelogReader( | |
| Serialization.read[Array[LineageItem]](lineageStr) | ||
| } | ||
|
|
||
| // The array contains lineage information from [snapShotVersion, version] | ||
|
||
| lazy val lineage: Array[LineageItem] = readLineage() | ||
|
|
||
| def version: Short | ||
|
|
@@ -632,27 +633,41 @@ abstract class StateStoreChangeDataReader( | |
| * Iterator that iterates over the changelog files in the state store. | ||
| */ | ||
| private class ChangeLogFileIterator extends Iterator[Path] { | ||
| val versionsAndUniqueIds: Iterator[(Long, Option[String])] = | ||
| StateStoreChangeDataReader.this.versionsAndUniqueIds.iterator | ||
|
|
||
| private var currentVersion = StateStoreChangeDataReader.this.startVersion - 1 | ||
| private var currentUniqueId: Option[String] = None | ||
|
|
||
| /** returns the version of the changelog returned by the latest [[next]] function call */ | ||
| def getVersion: Long = currentVersion | ||
|
|
||
| override def hasNext: Boolean = currentVersion < StateStoreChangeDataReader.this.endVersion | ||
| override def hasNext: Boolean = versionsAndUniqueIds.hasNext | ||
|
|
||
| override def next(): Path = { | ||
| currentVersion += 1 | ||
| getChangelogPath(currentVersion) | ||
| val nextTuple = versionsAndUniqueIds.next() | ||
| currentVersion = nextTuple._1 | ||
| currentUniqueId = nextTuple._2 | ||
| getChangelogPath(currentVersion, currentUniqueId) | ||
| } | ||
|
|
||
| private def getChangelogPath(version: Long): Path = | ||
| new Path( | ||
| StateStoreChangeDataReader.this.stateLocation, | ||
| s"$version.${StateStoreChangeDataReader.this.changelogSuffix}") | ||
| private def getChangelogPath(version: Long, checkpointUniqueId: Option[String]): Path = | ||
| if (checkpointUniqueId.isDefined) { | ||
| new Path( | ||
| StateStoreChangeDataReader.this.stateLocation, | ||
| s"${version}_${checkpointUniqueId.get}." + | ||
| s"${StateStoreChangeDataReader.this.changelogSuffix}") | ||
| } else { | ||
| new Path( | ||
| StateStoreChangeDataReader.this.stateLocation, | ||
| s"$version.${StateStoreChangeDataReader.this.changelogSuffix}") | ||
| } | ||
| } | ||
|
|
||
| /** file format of the changelog files */ | ||
| protected var changelogSuffix: String | ||
| protected val changelogSuffix: String | ||
| protected val versionsAndUniqueIds: Array[(Long, Option[String])] = | ||
| (startVersion to endVersion).map((_, None)).toArray | ||
| private lazy val fileIterator = new ChangeLogFileIterator | ||
| private var changelogReader: StateStoreChangelogReader = null | ||
|
|
||
|
|
@@ -671,11 +686,10 @@ abstract class StateStoreChangeDataReader( | |
| return null | ||
| } | ||
|
|
||
| changelogReader = if (colFamilyNameOpt.isDefined) { | ||
| new StateStoreChangelogReaderV2(fm, fileIterator.next(), compressionCodec) | ||
| } else { | ||
| new StateStoreChangelogReaderV1(fm, fileIterator.next(), compressionCodec) | ||
| } | ||
| val changelogFile = fileIterator.next() | ||
| changelogReader = | ||
| new StateStoreChangelogReaderFactory(fm, changelogFile, compressionCodec) | ||
| .constructChangelogReader() | ||
| } | ||
| changelogReader | ||
| } | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Just to confirm - this is backed by an error class correct ?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yes this is. But I changed it to be backed by a new error class
STDS_MIXED_CHECKPOINT_FORMAT_VERSIONS_NOT_SUPPORTED