-
Notifications
You must be signed in to change notification settings - Fork 29k
[SPARK-33259][SS] Disable streaming query with possible correctness issue by default #30210
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 3 commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -24,6 +24,7 @@ import org.apache.spark.sql.catalyst.expressions.aggregate.AggregateExpression | |
| import org.apache.spark.sql.catalyst.plans._ | ||
| import org.apache.spark.sql.catalyst.plans.logical._ | ||
| import org.apache.spark.sql.catalyst.streaming.InternalOutputModes | ||
| import org.apache.spark.sql.internal.SQLConf | ||
| import org.apache.spark.sql.streaming.OutputMode | ||
|
|
||
| /** | ||
|
|
@@ -40,10 +41,15 @@ object UnsupportedOperationChecker extends Logging { | |
| } | ||
| } | ||
|
|
||
| /** | ||
| * Checks for possible correctness issue in chained stateful operators. The behavior is | ||
| * controlled by SQL config `spark.sql.streaming.statefulOperator.correctnessCheck"`. Once it | ||
| * is enabled, an analysis exception will be thrown. Otherwise, Spark will just print a warning | ||
| * message which is the behavior before Spark 3.1.0. | ||
|
||
| */ | ||
| def checkStreamingQueryGlobalWatermarkLimit( | ||
| plan: LogicalPlan, | ||
| outputMode: OutputMode, | ||
| failWhenDetected: Boolean): Unit = { | ||
| outputMode: OutputMode): Unit = { | ||
| def isStatefulOperationPossiblyEmitLateRows(p: LogicalPlan): Boolean = p match { | ||
| case s: Aggregate | ||
| if s.isStreaming && outputMode == InternalOutputModes.Append => true | ||
|
|
@@ -62,6 +68,8 @@ object UnsupportedOperationChecker extends Logging { | |
| case _ => false | ||
| } | ||
|
|
||
| val failWhenDetected = SQLConf.get.statefulOperatorCorrectnessCheckEnabled | ||
|
|
||
| try { | ||
| plan.foreach { subPlan => | ||
| if (isStatefulOperation(subPlan)) { | ||
|
|
@@ -73,7 +81,10 @@ object UnsupportedOperationChecker extends Logging { | |
| "The query contains stateful operation which can emit rows older than " + | ||
| "the current watermark plus allowed late record delay, which are \"late rows\"" + | ||
| " in downstream stateful operations and these rows can be discarded. " + | ||
| "Please refer the programming guide doc for more details." | ||
| "Please refer the programming guide doc for more details. If you understand " + | ||
| "the possible risk of correctness issue and still need to run the query, " + | ||
| "you can disable this check by setting the config " + | ||
| "`spark.sql.streaming.statefulOperator.correctnessCheck` to false." | ||
| throwError(errorMsg)(plan) | ||
| } | ||
| } | ||
|
|
@@ -388,7 +399,7 @@ object UnsupportedOperationChecker extends Logging { | |
| checkUnsupportedExpressions(subPlan) | ||
| } | ||
|
|
||
| checkStreamingQueryGlobalWatermarkLimit(plan, outputMode, failWhenDetected = false) | ||
| checkStreamingQueryGlobalWatermarkLimit(plan, outputMode) | ||
| } | ||
|
|
||
| def checkForContinuous(plan: LogicalPlan, outputMode: OutputMode): Unit = { | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -1382,6 +1382,21 @@ object SQLConf { | |
| .booleanConf | ||
| .createWithDefault(true) | ||
|
|
||
| val STATEFUL_OPERATOR_CORRECTNESS_CHECK_ENABLED = | ||
| buildConf("spark.sql.streaming.statefulOperator.correctnessCheck") | ||
|
||
| .internal() | ||
| .doc("When true, the stateful operators for streaming query will be checked for possible " + | ||
| "correctness issue due to global watermark. The correctness issue comes from queries " + | ||
| "containing stateful operation which can emit rows older than the current watermark " + | ||
| "plus allowed late record delay, which are \"late rows\" in downstream stateful " + | ||
| "operations and these rows can be discarded. Please refer the programming guide doc for " + | ||
| "more details. Once the issue is detected, Spark will throw analysis exception. " + | ||
| "When this config is disabled, Spark will just print warning message for users. " + | ||
| "Prior to Spark 3.1.0, the behavior is disabling this config.") | ||
| .version("3.1.0") | ||
| .booleanConf | ||
| .createWithDefault(true) | ||
|
|
||
| val VARIABLE_SUBSTITUTE_ENABLED = | ||
| buildConf("spark.sql.variable.substitute") | ||
| .doc("This enables substitution using syntax like `${var}`, `${system:var}`, " + | ||
|
|
@@ -3028,6 +3043,9 @@ class SQLConf extends Serializable with Logging { | |
|
|
||
| def isUnsupportedOperationCheckEnabled: Boolean = getConf(UNSUPPORTED_OPERATION_CHECK_ENABLED) | ||
|
|
||
| def statefulOperatorCorrectnessCheckEnabled: Boolean = | ||
| getConf(STATEFUL_OPERATOR_CORRECTNESS_CHECK_ENABLED) | ||
|
|
||
| def streamingFileCommitProtocolClass: String = getConf(STREAMING_FILE_COMMIT_PROTOCOL_CLASS) | ||
|
|
||
| def fileSinkLogDeletion: Boolean = getConf(FILE_SINK_LOG_DELETION) | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -1324,7 +1324,9 @@ class FlatMapGroupsWithStateSuite extends StateStoreMetricsTest { | |
| def testWithAllStateVersions(name: String)(func: => Unit): Unit = { | ||
| for (version <- FlatMapGroupsWithStateExecHelper.supportedVersions) { | ||
| test(s"$name - state format version $version") { | ||
| withSQLConf(SQLConf.FLATMAPGROUPSWITHSTATE_STATE_FORMAT_VERSION.key -> version.toString) { | ||
| withSQLConf( | ||
| SQLConf.FLATMAPGROUPSWITHSTATE_STATE_FORMAT_VERSION.key -> version.toString, | ||
| SQLConf.STATEFUL_OPERATOR_CORRECTNESS_CHECK_ENABLED.key -> "false") { | ||
|
||
| func | ||
| } | ||
| } | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
nit: remove
"