-
Notifications
You must be signed in to change notification settings - Fork 29k
[SPARK-19304] [Streaming] [Kinesis] fix kinesis slow checkpoint recovery #16842
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 4 commits
b5e544a
d3b8c62
274ee27
4f5edd3
82499bc
c8efdcf
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -36,7 +36,8 @@ import org.apache.spark.util.NextIterator | |
| /** Class representing a range of Kinesis sequence numbers. Both sequence numbers are inclusive. */ | ||
| private[kinesis] | ||
| case class SequenceNumberRange( | ||
| streamName: String, shardId: String, fromSeqNumber: String, toSeqNumber: String) | ||
| streamName: String, shardId: String, fromSeqNumber: String, toSeqNumber: String, | ||
| recordCount: Int) | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why is this a property of a range -- or when would it not equal (from - to + 1)?
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Not sure of a better place to put.
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. OK, but is it an 'input' or 'output'? the usage below makes it look like the caller dictates how many records are in the range, but it doesn't know that ahead of time? I probably misunderstand this.
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. its an input to spark checkpoint metadata. On streaming KinesisReceiver receives records creates blocks & knows about seqNumber, count. When recovering from checkpoint we read back this information from checkpoint and make aws kinesis getRecords call with fromSeqNumber & limit
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'm worried this change will break checkpoint recovery, because we use Java serialization, and be a barrier to users from upgrading.
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Not sure on upgrading, since for code upgrade we need to delete the checkpoint directory and start afresh. I did run this patch and was able to serialize the limit into checkpoint, ( not a scala pro though) |
||
|
|
||
| /** Class representing an array of Kinesis sequence number ranges */ | ||
| private[kinesis] | ||
|
|
@@ -138,6 +139,8 @@ class KinesisSequenceRangeIterator( | |
| private val client = new AmazonKinesisClient(credentials) | ||
| private val streamName = range.streamName | ||
| private val shardId = range.shardId | ||
| // AWS limits to maximum of 10k records per get call | ||
| private val maxGetRecordsLimit = 10000 | ||
|
|
||
| private var toSeqNumberReceived = false | ||
| private var lastSeqNumber: String = null | ||
|
|
@@ -155,12 +158,14 @@ class KinesisSequenceRangeIterator( | |
|
|
||
| // If the internal iterator has not been initialized, | ||
| // then fetch records from starting sequence number | ||
| internalIterator = getRecords(ShardIteratorType.AT_SEQUENCE_NUMBER, range.fromSeqNumber) | ||
| internalIterator = getRecords(ShardIteratorType.AT_SEQUENCE_NUMBER, range.fromSeqNumber, | ||
| range.recordCount) | ||
| } else if (!internalIterator.hasNext) { | ||
|
|
||
| // If the internal iterator does not have any more records, | ||
| // then fetch more records after the last consumed sequence number | ||
| internalIterator = getRecords(ShardIteratorType.AFTER_SEQUENCE_NUMBER, lastSeqNumber) | ||
| internalIterator = getRecords(ShardIteratorType.AFTER_SEQUENCE_NUMBER, lastSeqNumber, | ||
| range.recordCount) | ||
| } | ||
|
|
||
| if (!internalIterator.hasNext) { | ||
|
|
@@ -193,9 +198,10 @@ class KinesisSequenceRangeIterator( | |
| /** | ||
| * Get records starting from or after the given sequence number. | ||
| */ | ||
| private def getRecords(iteratorType: ShardIteratorType, seqNum: String): Iterator[Record] = { | ||
| private def getRecords(iteratorType: ShardIteratorType, seqNum: String, | ||
|
||
| recordCount: Int): Iterator[Record] = { | ||
| val shardIterator = getKinesisIterator(iteratorType, seqNum) | ||
| val result = getRecordsAndNextKinesisIterator(shardIterator) | ||
| val result = getRecordsAndNextKinesisIterator(shardIterator, recordCount) | ||
| result._1 | ||
| } | ||
|
|
||
|
|
@@ -204,10 +210,11 @@ class KinesisSequenceRangeIterator( | |
| * to get records from Kinesis), and get the next shard iterator for next consumption. | ||
| */ | ||
| private def getRecordsAndNextKinesisIterator( | ||
| shardIterator: String): (Iterator[Record], String) = { | ||
| shardIterator: String, recordCount: Int): (Iterator[Record], String) = { | ||
|
||
| val getRecordsRequest = new GetRecordsRequest | ||
| getRecordsRequest.setRequestCredentials(credentials) | ||
| getRecordsRequest.setShardIterator(shardIterator) | ||
| getRecordsRequest.setLimit(Math.min(recordCount, this.maxGetRecordsLimit)) | ||
| val getRecordsResult = retryOrTimeout[GetRecordsResult]( | ||
| s"getting records using shard iterator") { | ||
| client.getRecords(getRecordsRequest) | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
one parameter per line: