@@ -28,34 +28,24 @@ import com.amazonaws.services.kinesis.clientlibrary.lib.worker.InitialPositionIn
2828
2929
3030/**
31- * Facade to create the Scala-based or Java-based streams.
32- * Also, contains a reusable utility methods.
31+ * Helper class to create Amazon Kinesis Input Stream
3332 * :: Experimental ::
3433 */
3534@ Experimental
3635object KinesisUtils extends Logging {
3736 /**
3837 * Create an InputDStream that pulls messages from a Kinesis stream.
3938 *
40- * @param StreamingContext object
41- * @param appName Kinesis Application Name. Kinesis Apps are mapped to Kinesis Streams
42- * by the Kinesis Client Library. If you change the App name or Stream name,
43- * the KCL will throw errors.
44- * @param stream Kinesis Stream Name
45- * @param endpoint url of Kinesis service
46- * @param checkpoint interval (millis) for Kinesis checkpointing (not Spark checkpointing).
47- * See the Kinesis Spark Streaming documentation for more details on the different types
48- * of checkpoints.
49- * @param initialPositionInStream in the absence of Kinesis checkpoint info, this is the
39+ * @param ssc StreamingContext
40+ * @param appName unique name for your Kinesis app. Multiple instances of the app pull from
41+ * the same stream. The Kinesis Client Library coordinates all load-balancing and
42+ * failure-recovery.
43+ * @param stream Kinesis stream name
44+ * @param endpoint url of Kinesis service (ie. https://kinesis.us-east-1.amazonaws.com)
45+ * Available endpoints: http://docs.aws.amazon.com/general/latest/gr/rande.html#ak_region
46+ * @param checkpointIntervalMillis interval (millis) for Kinesis checkpointing
47+ * @param initialPositionInStream in the absence of a Kinesis checkpoint info, this is the
5048 * worker's initial starting position in the stream.
51- * The values are either the beginning of the stream per Kinesis' limit of 24 hours
52- * (InitialPositionInStream.TRIM_HORIZON) or the tip of the stream
53- * (InitialPositionInStream.LATEST).
54- * The default is TRIM_HORIZON to avoid potential data loss. However, this presents the risk
55- * of processing records more than once.
56- * @param storageLevel The default is StorageLevel.MEMORY_AND_DISK_2 which replicates in-memory
57- * and on-disk to 2 nodes total (primary and secondary)
58- *
5949 * @return ReceiverInputDStream[Array[Byte]]
6050 */
6151 def createStream (
@@ -64,45 +54,34 @@ object KinesisUtils extends Logging {
6454 stream : String ,
6555 endpoint : String ,
6656 checkpointIntervalMillis : Long ,
67- initialPositionInStream : InitialPositionInStream ,
68- storageLevel : StorageLevel ): ReceiverInputDStream [Array [Byte ]] = {
57+ initialPositionInStream : InitialPositionInStream ): ReceiverInputDStream [Array [Byte ]] = {
6958 ssc.receiverStream(new KinesisReceiver (appName, stream, endpoint, checkpointIntervalMillis,
70- initialPositionInStream, storageLevel ))
59+ initialPositionInStream ))
7160 }
7261
7362 /**
7463 * Create a Java-friendly InputDStream that pulls messages from a Kinesis stream.
7564 *
76- * @param JavaStreamingContext object
77- * @param appName Kinesis Application Name. Kinesis Apps are mapped to Kinesis Streams
78- * by the Kinesis Client Library. If you change the App name or Stream name,
79- * the KCL will throw errors.
80- * @param stream Kinesis Stream Name
81- * @param endpoint url of Kinesis service
82- * @param checkpoint interval (millis) for Kinesis checkpointing (not Spark checkpointing).
83- * See the Kinesis Spark Streaming documentation for more details on the different types
84- * of checkpoints.
85- * @param initialPositionInStream in the absence of Kinesis checkpoint info, this is the
65+ * @param jssc Java StreamingContext object
66+ * @param appName unique name for your Kinesis app. Multiple instances of the app pull from
67+ * the same stream. The Kinesis Client Library coordinates all load-balancing and
68+ * failure-recovery.
69+ * @param stream Kinesis stream name
70+ * @param endpoint url of Kinesis service (ie. https://kinesis.us-east-1.amazonaws.com)
71+ * Available endpoints: http://docs.aws.amazon.com/general/latest/gr/rande.html#ak_region
72+ * @param checkpointIntervalMillis interval (millis) for Kinesis checkpointing
73+ * @param initialPositionInStream in the absence of a Kinesis checkpoint info, this is the
8674 * worker's initial starting position in the stream.
87- * The values are either the beginning of the stream per Kinesis' limit of 24 hours
88- * (InitialPositionInStream.TRIM_HORIZON) or the tip of the stream
89- * (InitialPositionInStream.LATEST).
90- * The default is TRIM_HORIZON to avoid potential data loss. However, this presents the risk
91- * of processing records more than once.
92- * @param storageLevel The default is StorageLevel.MEMORY_AND_DISK_2 which replicates in-memory
93- * and on-disk to 2 nodes total (primary and secondary)
94- *
9575 * @return JavaReceiverInputDStream[Array[Byte]]
9676 */
9777 def createStream (
9878 jssc : JavaStreamingContext ,
9979 appName : String ,
10080 stream : String ,
10181 endpoint : String ,
102- checkpointIntervalMillis : Long ,
103- initialPositionInStream : InitialPositionInStream ,
104- storageLevel : StorageLevel ): JavaReceiverInputDStream [Array [Byte ]] = {
82+ checkpointIntervalMillis : Long ,
83+ initialPositionInStream : InitialPositionInStream ): JavaReceiverInputDStream [Array [Byte ]] = {
10584 jssc.receiverStream(new KinesisReceiver (appName, stream, endpoint, checkpointIntervalMillis,
106- initialPositionInStream, storageLevel ))
85+ initialPositionInStream))
10786 }
10887}
0 commit comments