Skip to content

Commit 239c86a

Browse files
committed
Produce uncorrelated IN for Contains with nullable item (#32575)
Fixes #32574 (cherry picked from commit 7b712f8)
1 parent c153c95 commit 239c86a

31 files changed

Lines changed: 1228 additions & 589 deletions

File tree

src/EFCore.Relational/Query/SqlNullabilityProcessor.cs

Lines changed: 178 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,9 @@ public class SqlNullabilityProcessor
2727
private static readonly bool UseOldBehavior32208 =
2828
AppContext.TryGetSwitch("Microsoft.EntityFrameworkCore.Issue32208", out var enabled32208) && enabled32208;
2929

30+
private static readonly bool UseOldBehavior32574 =
31+
AppContext.TryGetSwitch("Microsoft.EntityFrameworkCore.Issue32574", out var enabled32574) && enabled32574;
32+
3033
/// <summary>
3134
/// Creates a new instance of the <see cref="SqlNullabilityProcessor" /> class.
3235
/// </summary>
@@ -706,7 +709,7 @@ protected virtual SqlExpression VisitIn(InExpression inExpression, bool allowOpt
706709
return inExpression;
707710

708711
case (true, false):
709-
{
712+
NullableItemWithNonNullableProjection:
710713
// If the item is actually null (not just nullable) and the projection is non-nullable, just return false immediately:
711714
// WHERE NULL IN (SELECT NonNullable FROM foo) -> false
712715
if (IsNull(item))
@@ -721,7 +724,6 @@ protected virtual SqlExpression VisitIn(InExpression inExpression, bool allowOpt
721724
return allowOptimizedExpansion
722725
? inExpression
723726
: _sqlExpressionFactory.AndAlso(inExpression, _sqlExpressionFactory.IsNotNull(item));
724-
}
725727

726728
case (false, true):
727729
{
@@ -734,6 +736,14 @@ protected virtual SqlExpression VisitIn(InExpression inExpression, bool allowOpt
734736
return inExpression;
735737
}
736738

739+
// If the subquery happens to be a primitive collection (e.g. OPENJSON), pull out the null values from the parameter.
740+
// Since the item is non-nullable, it can never match those null values, and all they do is cause the IN expression
741+
// to return NULL if the item isn't found. So just remove them.
742+
if (!UseOldBehavior32574 && TryMakeNonNullable(subquery, out var nonNullableSubquery, out _))
743+
{
744+
return inExpression.Update(item, nonNullableSubquery);
745+
}
746+
737747
// On SQL Server, EXISTS isn't less efficient than IN, and the additional COALESCE (and CASE/WHEN which it requires)
738748
// add unneeded clutter (and possibly hurt perf). So allow providers to prefer EXISTS.
739749
if (PreferExistsToInWithCoalesce)
@@ -745,13 +755,46 @@ protected virtual SqlExpression VisitIn(InExpression inExpression, bool allowOpt
745755
}
746756

747757
case (true, true):
748-
TransformToExists:
749-
// Worst case: both sides are nullable; there's no way to distinguish the item was found or not.
750-
// We rewrite to an EXISTS subquery where we can generate a precise predicate to check for what we need. Note that this
751-
// performs (significantly) worse than an IN expression, since it involves a correlated subquery.
758+
// Worst case: both sides are nullable; that means that with IN, there's no way to distinguish between:
759+
// a) The item was NULL and was found (e.g. NULL IN (1, 2, NULL) should yield true), and
760+
// b) The item wasn't found (e.g. 3 IN (1, 2, NULL) should yield false)
761+
762+
// As a last resort, we can rewrite to an EXISTS subquery where we can generate a precise predicate to check for what we
763+
// need. This unfortunately performs (significantly) worse than an IN expression, since it involves a correlated
764+
// subquery, and can cause indexes to not get used.
765+
766+
// But before doing this, we check whether the subquery represents a simple parameterized collection (e.g. a bare
767+
// OPENJSON call over a parameter in SQL Server), and if it is, rewrite the parameter to remove nulls so we can keep
768+
// using IN.
769+
if (!UseOldBehavior32574 && TryMakeNonNullable(subquery, out var nonNullableSubquery2, out var foundNull))
770+
{
771+
inExpression = inExpression.Update(item, nonNullableSubquery2);
752772

753-
// We'll need to mutate the subquery to introduce the predicate inside it, but it might be referenced by other places in
754-
// the tree, so we create a copy to work on.
773+
if (!foundNull.Value)
774+
{
775+
// There weren't any actual nulls inside the parameterized collection - we can jump to the case which handles
776+
// that.
777+
goto NullableItemWithNonNullableProjection;
778+
}
779+
780+
// Nulls were found inside the parameterized collection, and removed. If the item is a null constant, just convert
781+
// the whole thing to true.
782+
if (IsNull(item))
783+
{
784+
return _sqlExpressionFactory.Constant(true, inExpression.TypeMapping);
785+
}
786+
787+
// Otherwise we now need to compensate for the removed nulls outside, by adding OR item IS NULL.
788+
// Note that this is safe in negated (non-optimized) contexts:
789+
// WHERE item NOT IN ("foo", "bar") AND item IS NOT NULL
790+
// When item is NULL, the item IS NOT NULL clause causes the whole thing to return false. Otherwise that clause
791+
// can be ignored, and we have non-null item IN non-null list-of-values.
792+
return _sqlExpressionFactory.OrElse(inExpression, _sqlExpressionFactory.IsNull(item));
793+
}
794+
795+
TransformToExists:
796+
// We unfortunately need to rewrite to EXISTS. We'll need to mutate the subquery to introduce the predicate inside it,
797+
// but it might be referenced by other places in the tree, so we create a copy to work on.
755798

756799
// No need for a projection with EXISTS, clear it to get SELECT 1
757800
subquery = subquery.Update(
@@ -1960,6 +2003,133 @@ static bool TryNegate(ExpressionType expressionType, out ExpressionType result)
19602003
}
19612004
}
19622005

2006+
/// <summary>
2007+
/// Attempts to convert the given <paramref name="selectExpression" />, which has a nullable projection, to an identical expression
2008+
/// which does not have a nullable projection. This is used to extract NULLs out of e.g. the parameter argument of SQL Server
2009+
/// OPENJSON, in order to allow a more efficient translation.
2010+
/// </summary>
2011+
[EntityFrameworkInternal]
2012+
protected virtual bool TryMakeNonNullable(
2013+
SelectExpression selectExpression,
2014+
[NotNullWhen(true)] out SelectExpression? rewrittenSelectExpression,
2015+
[NotNullWhen(true)] out bool? foundNull)
2016+
{
2017+
if (selectExpression is
2018+
{
2019+
Tables: [var collectionTable],
2020+
GroupBy: [],
2021+
Having: null,
2022+
Limit: null,
2023+
Offset: null,
2024+
Predicate: null,
2025+
// Note that a orderings and distinct are OK - they don't interact with our null removal.
2026+
// We exclude the predicate since it may actually filter out nulls
2027+
Projection: [{ Expression: ColumnExpression projectedColumn }] projection
2028+
}
2029+
&& projectedColumn.Table == collectionTable
2030+
&& IsCollectionTable(collectionTable, out var collection)
2031+
&& collection is SqlParameterExpression collectionParameter
2032+
&& ParameterValues[collectionParameter.Name] is IList values)
2033+
{
2034+
// We're looking at a parameter beyond its simple nullability, so we can't use the 2nd-level cache for this query.
2035+
DoNotCache();
2036+
2037+
IList? processedValues = null;
2038+
2039+
for (var i = 0; i < values.Count; i++)
2040+
{
2041+
var value = values[i];
2042+
2043+
if (value is null)
2044+
{
2045+
if (processedValues is null)
2046+
{
2047+
var elementClrType = values.GetType().GetSequenceType();
2048+
processedValues = (IList)Activator.CreateInstance(typeof(List<>).MakeGenericType(elementClrType), values.Count)!;
2049+
for (var j = 0; j < i; j++)
2050+
{
2051+
processedValues.Add(values[j]!);
2052+
}
2053+
}
2054+
2055+
// Skip the value
2056+
continue;
2057+
}
2058+
2059+
processedValues?.Add(value);
2060+
}
2061+
2062+
if (processedValues is null)
2063+
{
2064+
// No null was found in the parameter's elements - the select expression is already non-nullable.
2065+
// TODO: We should change the project column to be non-nullable, but it's too closed down for that.
2066+
rewrittenSelectExpression = selectExpression;
2067+
foundNull = false;
2068+
return true;
2069+
}
2070+
2071+
foundNull = true;
2072+
2073+
// TODO: We currently only have read-only access to the parameter values in the nullability processor (and in all of the
2074+
// 2nd-level query pipeline); to need to flow the mutable dictionary in. Note that any modification of parameter values (as
2075+
// here) must immediately entail DoNotCache().
2076+
Check.DebugAssert(ParameterValues is Dictionary<string, object?>, "ParameterValues isn't a Dictionary");
2077+
if (ParameterValues is not Dictionary<string, object?> mutableParameterValues)
2078+
{
2079+
rewrittenSelectExpression = null;
2080+
foundNull = null;
2081+
return false;
2082+
}
2083+
2084+
var rewrittenParameter = new SqlParameterExpression(
2085+
collectionParameter.Name + "_without_nulls", collectionParameter.Type, collectionParameter.TypeMapping);
2086+
mutableParameterValues[rewrittenParameter.Name] = processedValues;
2087+
var rewrittenCollectionTable = UpdateParameterCollection(collectionTable, rewrittenParameter);
2088+
2089+
// We clone the select expression since Update below doesn't create a pure copy, mutating the original as well (because of
2090+
// TableReferenceExpression). TODO: Remove this after #31327.
2091+
#pragma warning disable EF1001
2092+
rewrittenSelectExpression = selectExpression.Clone();
2093+
#pragma warning restore EF1001
2094+
2095+
rewrittenSelectExpression = rewrittenSelectExpression.Update(
2096+
projection, // TODO: We should change the project column to be non-nullable, but it's too closed down for that.
2097+
new[] { rewrittenCollectionTable },
2098+
selectExpression.Predicate,
2099+
selectExpression.GroupBy,
2100+
selectExpression.Having,
2101+
selectExpression.Orderings,
2102+
selectExpression.Limit,
2103+
selectExpression.Offset);
2104+
2105+
return true;
2106+
}
2107+
2108+
rewrittenSelectExpression = null;
2109+
foundNull = null;
2110+
return false;
2111+
}
2112+
2113+
/// <summary>
2114+
/// A provider hook for identifying a <see cref="TableExpressionBase" /> which represents a collection, e.g. OPENJSON on SQL Server.
2115+
/// </summary>
2116+
[EntityFrameworkInternal]
2117+
protected virtual bool IsCollectionTable(TableExpressionBase table, [NotNullWhen(true)] out Expression? collection)
2118+
{
2119+
collection = null;
2120+
return false;
2121+
}
2122+
2123+
/// <summary>
2124+
/// Given a <see cref="TableExpressionBase" /> which was previously identified to be a parameterized collection table (e.g.
2125+
/// OPENJSON on SQL Server, see <see cref="IsCollectionTable" />), replaces the parameter for that table.
2126+
/// </summary>
2127+
[EntityFrameworkInternal]
2128+
protected virtual TableExpressionBase UpdateParameterCollection(
2129+
TableExpressionBase table,
2130+
SqlParameterExpression newCollectionParameter)
2131+
=> throw new InvalidOperationException();
2132+
19632133
private SqlExpression ProcessNullNotNull(SqlUnaryExpression sqlUnaryExpression, bool operandNullable)
19642134
{
19652135
if (!operandNullable)

src/EFCore.SqlServer/Query/Internal/SqlServerSqlNullabilityProcessor.cs

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
11
// Licensed to the .NET Foundation under one or more agreements.
22
// The .NET Foundation licenses this file to you under the MIT license.
33

4+
using System.Collections;
5+
using System.Diagnostics.CodeAnalysis;
46
using Microsoft.EntityFrameworkCore.Query.SqlExpressions;
57

68
namespace Microsoft.EntityFrameworkCore.SqlServer.Query.Internal;
@@ -113,4 +115,36 @@ protected virtual SqlExpression VisitSqlServerAggregateFunction(
113115
/// </summary>
114116
protected override bool PreferExistsToInWithCoalesce
115117
=> true;
118+
119+
#pragma warning disable EF1001
120+
/// <summary>
121+
/// This is an internal API that supports the Entity Framework Core infrastructure and not subject to
122+
/// the same compatibility standards as public APIs. It may be changed or removed without notice in
123+
/// any release. You should only use it directly in your code with extreme caution and knowing that
124+
/// doing so can result in application failures when updating to a new Entity Framework Core release.
125+
/// </summary>
126+
protected override bool IsCollectionTable(TableExpressionBase table, [NotNullWhen(true)] out Expression? collection)
127+
{
128+
if (table is SqlServerOpenJsonExpression { Arguments: [var argument] })
129+
{
130+
collection = argument;
131+
return true;
132+
}
133+
134+
return base.IsCollectionTable(table, out collection);
135+
}
136+
137+
/// <summary>
138+
/// This is an internal API that supports the Entity Framework Core infrastructure and not subject to
139+
/// the same compatibility standards as public APIs. It may be changed or removed without notice in
140+
/// any release. You should only use it directly in your code with extreme caution and knowing that
141+
/// doing so can result in application failures when updating to a new Entity Framework Core release.
142+
/// </summary>
143+
protected override TableExpressionBase UpdateParameterCollection(
144+
TableExpressionBase table,
145+
SqlParameterExpression newCollectionParameter)
146+
=> table is SqlServerOpenJsonExpression { Arguments: [SqlParameterExpression] } openJsonExpression
147+
? openJsonExpression.Update(newCollectionParameter, path: null)
148+
: base.UpdateParameterCollection(table, newCollectionParameter);
149+
#pragma warning restore EF1001
116150
}

src/EFCore.Sqlite.Core/Query/Internal/SqliteSqlNullabilityProcessor.cs

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
// Licensed to the .NET Foundation under one or more agreements.
22
// The .NET Foundation licenses this file to you under the MIT license.
33

4+
using System.Diagnostics.CodeAnalysis;
45
using Microsoft.EntityFrameworkCore.Query.SqlExpressions;
56
using Microsoft.EntityFrameworkCore.Sqlite.Query.SqlExpressions.Internal;
67

@@ -83,4 +84,36 @@ protected virtual SqlExpression VisitRegexp(
8384

8485
return regexpExpression.Update(match, pattern);
8586
}
87+
88+
#pragma warning disable EF1001
89+
/// <summary>
90+
/// This is an internal API that supports the Entity Framework Core infrastructure and not subject to
91+
/// the same compatibility standards as public APIs. It may be changed or removed without notice in
92+
/// any release. You should only use it directly in your code with extreme caution and knowing that
93+
/// doing so can result in application failures when updating to a new Entity Framework Core release.
94+
/// </summary>
95+
protected override bool IsCollectionTable(TableExpressionBase table, [NotNullWhen(true)] out Expression? collection)
96+
{
97+
if (table is TableValuedFunctionExpression { Name: "json_each", Schema: null, IsBuiltIn: true, Arguments: [var argument] })
98+
{
99+
collection = argument;
100+
return true;
101+
}
102+
103+
return base.IsCollectionTable(table, out collection);
104+
}
105+
106+
/// <summary>
107+
/// This is an internal API that supports the Entity Framework Core infrastructure and not subject to
108+
/// the same compatibility standards as public APIs. It may be changed or removed without notice in
109+
/// any release. You should only use it directly in your code with extreme caution and knowing that
110+
/// doing so can result in application failures when updating to a new Entity Framework Core release.
111+
/// </summary>
112+
protected override TableExpressionBase UpdateParameterCollection(
113+
TableExpressionBase table,
114+
SqlParameterExpression newCollectionParameter)
115+
=> table is TableValuedFunctionExpression { Arguments: [SqlParameterExpression] } jsonEachExpression
116+
? jsonEachExpression.Update(new[] { newCollectionParameter })
117+
: base.UpdateParameterCollection(table, newCollectionParameter);
118+
#pragma warning restore EF1001
86119
}

test/EFCore.Relational.Specification.Tests/Query/NullSemanticsQueryTestBase.cs

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1244,6 +1244,8 @@ await AssertQueryScalar(
12441244

12451245
#endregion Contains with subquery
12461246

1247+
// For more tests on Contains with parameterized collections, see PrimitiveCollectionsqueryTestBase
1248+
12471249
#region Contains with inline collection
12481250

12491251
[ConditionalTheory]
@@ -1260,7 +1262,7 @@ await AssertQueryScalar(
12601262

12611263
[ConditionalTheory]
12621264
[MemberData(nameof(IsAsyncData))]
1263-
public virtual async Task Null_semantics_contains_with_non_nullable_item_and_inline_non_nullable_values_with_null(bool async)
1265+
public virtual async Task Null_semantics_contains_with_non_nullable_item_and_inline_values_with_null(bool async)
12641266
{
12651267
await AssertQueryScalar(
12661268
async, ss => ss.Set<NullSemanticsEntity1>()
@@ -1272,7 +1274,7 @@ await AssertQueryScalar(
12721274

12731275
[ConditionalTheory]
12741276
[MemberData(nameof(IsAsyncData))]
1275-
public virtual async Task Null_semantics_contains_with_non_nullable_item_and_inline_nullable_values(bool async)
1277+
public virtual async Task Null_semantics_contains_with_non_nullable_item_and_inline_values_with_nullable_column(bool async)
12761278
{
12771279
await AssertQueryScalar(
12781280
async, ss => ss.Set<NullSemanticsEntity1>()
@@ -1284,7 +1286,7 @@ await AssertQueryScalar(
12841286

12851287
[ConditionalTheory]
12861288
[MemberData(nameof(IsAsyncData))]
1287-
public virtual async Task Null_semantics_contains_with_non_nullable_item_and_inline_nullable_values_with_null(bool async)
1289+
public virtual async Task Null_semantics_contains_with_non_nullable_item_and_inline_values_with_nullable_column_and_null(bool async)
12881290
{
12891291
await AssertQueryScalar(
12901292
async, ss => ss.Set<NullSemanticsEntity1>()
@@ -1308,7 +1310,7 @@ await AssertQueryScalar(
13081310

13091311
[ConditionalTheory]
13101312
[MemberData(nameof(IsAsyncData))]
1311-
public virtual async Task Null_semantics_contains_with_nullable_item_and_inline_non_nullable_values_with_null(bool async)
1313+
public virtual async Task Null_semantics_contains_with_nullable_item_and_inline_values_with_null(bool async)
13121314
{
13131315
await AssertQueryScalar(
13141316
async, ss => ss.Set<NullSemanticsEntity1>()
@@ -1320,7 +1322,7 @@ await AssertQueryScalar(
13201322

13211323
[ConditionalTheory]
13221324
[MemberData(nameof(IsAsyncData))]
1323-
public virtual async Task Null_semantics_contains_with_nullable_item_and_inline_nullable_values(bool async)
1325+
public virtual async Task Null_semantics_contains_with_nullable_item_and_inline_values_with_nullable_column(bool async)
13241326
{
13251327
await AssertQueryScalar(
13261328
async, ss => ss.Set<NullSemanticsEntity1>()
@@ -1332,7 +1334,7 @@ await AssertQueryScalar(
13321334

13331335
[ConditionalTheory]
13341336
[MemberData(nameof(IsAsyncData))]
1335-
public virtual async Task Null_semantics_contains_with_nullable_item_and_inline_nullable_values_with_null(bool async)
1337+
public virtual async Task Null_semantics_contains_with_nullable_item_and_values_with_nullable_column_and_null(bool async)
13361338
{
13371339
await AssertQueryScalar(
13381340
async, ss => ss.Set<NullSemanticsEntity1>()

0 commit comments

Comments
 (0)