-
Notifications
You must be signed in to change notification settings - Fork 29k
[SPARK-37957][SQL] Correctly pass deterministic flag for V2 scalar functions #35243
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -240,6 +240,8 @@ object SerializerSupport { | |
| * without invoking the function. | ||
| * @param returnNullable When false, indicating the invoked method will always return | ||
| * non-null value. | ||
| * @param isDeterministic Whether the method invocation is deterministic or not. If false, Spark | ||
| * will not apply certain optimizations such as constant folding. | ||
| */ | ||
| case class StaticInvoke( | ||
| staticObject: Class[_], | ||
|
|
@@ -248,7 +250,8 @@ case class StaticInvoke( | |
| arguments: Seq[Expression] = Nil, | ||
| inputTypes: Seq[AbstractDataType] = Nil, | ||
| propagateNull: Boolean = true, | ||
| returnNullable: Boolean = true) extends InvokeLike { | ||
| returnNullable: Boolean = true, | ||
| isDeterministic: Boolean = true) extends InvokeLike { | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think this might be controversial to backport .. as
While I agree with this being merged in 3.3.0, and I don't feel strongly on this in 3.2.X, maybe we can consider reverting this out of I will leave it to you @sunchao.
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think this is a bug fix. A V2 catalog can return a function that's non-deterministic, while without the fix Spark can treat it as deterministic and apply related optimization rules (e.g., constant folding), which could cause correctness issues. Since this is already in Spark 3.2.1, I don't see much benefit of reverting it and re-introduce the correctness issue.
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. +1 on keeping this in 3.2.x. This fixed the correctness issue and we actually intentionally included this fix in 3.2.1 release.
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. That's fine. I didn't have a strong preference so I am okay with keeping it either 👍 . |
||
|
|
||
| val objectName = staticObject.getName.stripSuffix("$") | ||
| val cls = if (staticObject.getName == objectName) { | ||
|
|
@@ -259,6 +262,7 @@ case class StaticInvoke( | |
|
|
||
| override def nullable: Boolean = needNullCheck || returnNullable | ||
| override def children: Seq[Expression] = arguments | ||
| override lazy val deterministic: Boolean = isDeterministic && arguments.forall(_.deterministic) | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. nit: seems we can move this to
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I thought about this too, but then we'd have to move |
||
|
|
||
| lazy val argClasses = ScalaReflection.expressionJavaClasses(arguments) | ||
| @transient lazy val method = findMethod(cls, functionName, argClasses) | ||
|
|
@@ -340,6 +344,8 @@ case class StaticInvoke( | |
| * without invoking the function. | ||
| * @param returnNullable When false, indicating the invoked method will always return | ||
| * non-null value. | ||
| * @param isDeterministic Whether the method invocation is deterministic or not. If false, Spark | ||
| * will not apply certain optimizations such as constant folding. | ||
| */ | ||
| case class Invoke( | ||
| targetObject: Expression, | ||
|
|
@@ -348,12 +354,14 @@ case class Invoke( | |
| arguments: Seq[Expression] = Nil, | ||
| methodInputTypes: Seq[AbstractDataType] = Nil, | ||
| propagateNull: Boolean = true, | ||
| returnNullable : Boolean = true) extends InvokeLike { | ||
| returnNullable : Boolean = true, | ||
| isDeterministic: Boolean = true) extends InvokeLike { | ||
|
|
||
| lazy val argClasses = ScalaReflection.expressionJavaClasses(arguments) | ||
|
|
||
| override def nullable: Boolean = targetObject.nullable || needNullCheck || returnNullable | ||
| override def children: Seq[Expression] = targetObject +: arguments | ||
| override lazy val deterministic: Boolean = isDeterministic && arguments.forall(_.deterministic) | ||
| override def inputTypes: Seq[AbstractDataType] = | ||
| if (methodInputTypes.nonEmpty) { | ||
| Seq(targetObject.dataType) ++ methodInputTypes | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,110 @@ | ||
| /* | ||
| * Licensed to the Apache Software Foundation (ASF) under one or more | ||
| * contributor license agreements. See the NOTICE file distributed with | ||
| * this work for additional information regarding copyright ownership. | ||
| * The ASF licenses this file to You under the Apache License, Version 2.0 | ||
| * (the "License"); you may not use this file except in compliance with | ||
| * the License. You may obtain a copy of the License at | ||
| * | ||
| * http://www.apache.org/licenses/LICENSE-2.0 | ||
| * | ||
| * Unless required by applicable law or agreed to in writing, software | ||
| * distributed under the License is distributed on an "AS IS" BASIS, | ||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
| * See the License for the specific language governing permissions and | ||
| * limitations under the License. | ||
| */ | ||
|
|
||
| package test.org.apache.spark.sql.connector.catalog.functions; | ||
|
|
||
| import java.util.Random; | ||
|
|
||
| import org.apache.spark.sql.catalyst.InternalRow; | ||
| import org.apache.spark.sql.connector.catalog.functions.BoundFunction; | ||
| import org.apache.spark.sql.connector.catalog.functions.ScalarFunction; | ||
| import org.apache.spark.sql.connector.catalog.functions.UnboundFunction; | ||
| import org.apache.spark.sql.types.DataType; | ||
| import org.apache.spark.sql.types.DataTypes; | ||
| import org.apache.spark.sql.types.IntegerType; | ||
| import org.apache.spark.sql.types.StructType; | ||
|
|
||
| /** | ||
| * Test V2 function which add a random number to the input integer. | ||
| */ | ||
| public class JavaRandomAdd implements UnboundFunction { | ||
| private final BoundFunction fn; | ||
|
|
||
| public JavaRandomAdd(BoundFunction fn) { | ||
| this.fn = fn; | ||
| } | ||
|
|
||
| @Override | ||
| public String name() { | ||
| return "rand"; | ||
| } | ||
|
|
||
| @Override | ||
| public BoundFunction bind(StructType inputType) { | ||
| if (inputType.fields().length != 1) { | ||
| throw new UnsupportedOperationException("Expect exactly one argument"); | ||
| } | ||
| if (inputType.fields()[0].dataType() instanceof IntegerType) { | ||
| return fn; | ||
| } | ||
| throw new UnsupportedOperationException("Expect IntegerType"); | ||
| } | ||
|
|
||
| @Override | ||
| public String description() { | ||
| return "rand_add: add a random integer to the input\n" + | ||
| "rand_add(int) -> int"; | ||
| } | ||
|
|
||
| public abstract static class JavaRandomAddBase implements ScalarFunction<Integer> { | ||
| @Override | ||
| public DataType[] inputTypes() { | ||
| return new DataType[] { DataTypes.IntegerType }; | ||
| } | ||
|
|
||
| @Override | ||
| public DataType resultType() { | ||
| return DataTypes.IntegerType; | ||
| } | ||
|
|
||
| @Override | ||
| public String name() { | ||
| return "rand_add"; | ||
| } | ||
|
|
||
| @Override | ||
| public boolean isDeterministic() { | ||
| return false; | ||
| } | ||
| } | ||
|
|
||
| public static class JavaRandomAddDefault extends JavaRandomAddBase { | ||
| private final Random rand = new Random(); | ||
|
|
||
| @Override | ||
| public Integer produceResult(InternalRow input) { | ||
| return input.getInt(0) + rand.nextInt(); | ||
| } | ||
| } | ||
|
|
||
| public static class JavaRandomAddMagic extends JavaRandomAddBase { | ||
| private final Random rand = new Random(); | ||
|
|
||
| public int invoke(int input) { | ||
| return input + rand.nextInt(); | ||
| } | ||
| } | ||
|
|
||
| public static class JavaRandomAddStaticMagic extends JavaRandomAddBase { | ||
| private static final Random rand = new Random(); | ||
|
|
||
| public static int invoke(int input) { | ||
| return input + rand.nextInt(); | ||
| } | ||
| } | ||
| } | ||
|
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
why this defaults to true?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think in majority cases a function is deterministic, so defaulting to true here. This is similar to how we treat
propagateNullandreturnNullablehere.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
+1 for default true.