Skip to content

Commit b4c746b

Browse files
authored
Add regex "unit tests" test project (#65944)
* Add regex "unit tests" test project This follows the convention used by the networking tests, which typically have two distinct test projects per library: functional tests and unit tests. The former are what we typically refer to as our tests for a library, whereas the latter build product source into the test project in order to directly validate internals (an alternative to this is to use InternalsVisibleTo, but that negatively impacts trimming and makes it more challenging to maintain a property boundary for the functional tests). All the existing tests are moved unedited into the FunctionalTests, and a new UnitTests project is added with some initial tests for the RegexTreeAnalyzer code. The generator parser tests were also consolidated into the functional tests, as there's no longer a good reason for those few tests to be separate. I fixed a few bugs in RegexTreeAnalyzer as a result. In particular, we were over-annotating things as potentially containing captures or backtracking because in the implementation we were using the lookup APIs meant to be used only once all analysis was complete. This doesn't have a negative functional impact, but it does negatively impact perf of compiled / source generator, which then generate unnecessary code. We were also incorrectly conflating atomicity conferred by a grandparent with atomicity conferred by a parent; we need MayBacktracks to reflect only the atomicity directly contributed by a node, not by its parent's influence, as we need the parent to be able to understand whether the child might backtrack. * Add some unit tests for RegexFindOptimizations * Address PR feedback
1 parent 3d6781b commit b4c746b

File tree

53 files changed

+433
-205
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

53 files changed

+433
-205
lines changed

src/libraries/System.Text.RegularExpressions/System.Text.RegularExpressions.sln

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -11,9 +11,9 @@ Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "System.Text.RegularExpressi
1111
EndProject
1212
Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "System.Text.RegularExpressions", "src\System.Text.RegularExpressions.csproj", "{0409C086-D7CC-43F8-9762-C94FB1E47F5B}"
1313
EndProject
14-
Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "System.Text.RegularExpressions.Generators.Tests", "tests\System.Text.RegularExpressions.Generators.Tests\System.Text.RegularExpressions.Generators.Tests.csproj", "{32ABFCDA-10FD-4A98-A429-145C28021EBE}"
14+
Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "System.Text.RegularExpressions.Tests", "tests\FunctionalTests\System.Text.RegularExpressions.Tests.csproj", "{8EE1A7C4-3630-4900-8976-9B3ADAFF10DC}"
1515
EndProject
16-
Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "System.Text.RegularExpressions.Tests", "tests\System.Text.RegularExpressions.Tests.csproj", "{8EE1A7C4-3630-4900-8976-9B3ADAFF10DC}"
16+
Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "System.Text.RegularExpressions.UnitTests", "tests\UnitTests\System.Text.RegularExpressions.UnitTests.csproj", "{A86931EC-34DC-40A8-BD8C-F5E13BDBA903}"
1717
EndProject
1818
Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "tests", "tests", "{2ACCCAAB-F0CE-4839-82BD-F174861DEA78}"
1919
EndProject
@@ -53,22 +53,22 @@ Global
5353
{0409C086-D7CC-43F8-9762-C94FB1E47F5B}.Debug|Any CPU.Build.0 = Debug|Any CPU
5454
{0409C086-D7CC-43F8-9762-C94FB1E47F5B}.Release|Any CPU.ActiveCfg = Release|Any CPU
5555
{0409C086-D7CC-43F8-9762-C94FB1E47F5B}.Release|Any CPU.Build.0 = Release|Any CPU
56-
{32ABFCDA-10FD-4A98-A429-145C28021EBE}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
57-
{32ABFCDA-10FD-4A98-A429-145C28021EBE}.Debug|Any CPU.Build.0 = Debug|Any CPU
58-
{32ABFCDA-10FD-4A98-A429-145C28021EBE}.Release|Any CPU.ActiveCfg = Release|Any CPU
59-
{32ABFCDA-10FD-4A98-A429-145C28021EBE}.Release|Any CPU.Build.0 = Release|Any CPU
6056
{8EE1A7C4-3630-4900-8976-9B3ADAFF10DC}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
6157
{8EE1A7C4-3630-4900-8976-9B3ADAFF10DC}.Debug|Any CPU.Build.0 = Debug|Any CPU
6258
{8EE1A7C4-3630-4900-8976-9B3ADAFF10DC}.Release|Any CPU.ActiveCfg = Release|Any CPU
6359
{8EE1A7C4-3630-4900-8976-9B3ADAFF10DC}.Release|Any CPU.Build.0 = Release|Any CPU
60+
{A86931EC-34DC-40A8-BD8C-F5E13BDBA903}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
61+
{A86931EC-34DC-40A8-BD8C-F5E13BDBA903}.Debug|Any CPU.Build.0 = Debug|Any CPU
62+
{A86931EC-34DC-40A8-BD8C-F5E13BDBA903}.Release|Any CPU.ActiveCfg = Release|Any CPU
63+
{A86931EC-34DC-40A8-BD8C-F5E13BDBA903}.Release|Any CPU.Build.0 = Release|Any CPU
6464
EndGlobalSection
6565
GlobalSection(SolutionProperties) = preSolution
6666
HideSolutionNode = FALSE
6767
EndGlobalSection
6868
GlobalSection(NestedProjects) = preSolution
6969
{63551298-BFD4-43FC-8465-AC454228B83C} = {2ACCCAAB-F0CE-4839-82BD-F174861DEA78}
70-
{32ABFCDA-10FD-4A98-A429-145C28021EBE} = {2ACCCAAB-F0CE-4839-82BD-F174861DEA78}
7170
{8EE1A7C4-3630-4900-8976-9B3ADAFF10DC} = {2ACCCAAB-F0CE-4839-82BD-F174861DEA78}
71+
{A86931EC-34DC-40A8-BD8C-F5E13BDBA903} = {2ACCCAAB-F0CE-4839-82BD-F174861DEA78}
7272
{08F0E5F4-BBD5-45CC-BB12-BA37A83AD7B6} = {0D20E771-24BD-4F9E-BBD0-60156E8C44FC}
7373
{77CDA838-6489-4816-8847-DE2C7F5E1DCE} = {0D20E771-24BD-4F9E-BBD0-60156E8C44FC}
7474
{3699C8E2-C354-4AED-81DC-ECBAC3EFEB4B} = {0D20E771-24BD-4F9E-BBD0-60156E8C44FC}

src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexFindOptimizations.cs

Lines changed: 6 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -127,25 +127,17 @@ public RegexFindOptimizations(RegexTree tree, CultureInfo culture)
127127
// The set contains one and only one character, meaning every match starts
128128
// with the same literal value (potentially case-insensitive). Search for that.
129129
FixedDistanceLiteral = (chars[0], 0);
130-
FindMode = (_rightToLeft, set.CaseInsensitive) switch
131-
{
132-
(false, false) => FindNextStartingPositionMode.FixedLiteral_LeftToRight_CaseSensitive,
133-
(false, true) => FindNextStartingPositionMode.FixedLiteral_LeftToRight_CaseInsensitive,
134-
(true, false) => FindNextStartingPositionMode.LeadingLiteral_RightToLeft_CaseSensitive,
135-
(true, true) => FindNextStartingPositionMode.LeadingLiteral_RightToLeft_CaseInsensitive,
136-
};
130+
FindMode = set.CaseInsensitive ?
131+
FindNextStartingPositionMode.LeadingLiteral_RightToLeft_CaseInsensitive :
132+
FindNextStartingPositionMode.LeadingLiteral_RightToLeft_CaseSensitive;
137133
}
138134
else
139135
{
140136
// The set may match multiple characters. Search for that.
141137
FixedDistanceSets = new() { (chars, set.CharClass, 0, set.CaseInsensitive) };
142-
FindMode = (_rightToLeft, set.CaseInsensitive) switch
143-
{
144-
(false, false) => FindNextStartingPositionMode.LeadingSet_LeftToRight_CaseSensitive,
145-
(false, true) => FindNextStartingPositionMode.LeadingSet_LeftToRight_CaseInsensitive,
146-
(true, false) => FindNextStartingPositionMode.LeadingSet_RightToLeft_CaseSensitive,
147-
(true, true) => FindNextStartingPositionMode.LeadingSet_RightToLeft_CaseInsensitive,
148-
};
138+
FindMode = set.CaseInsensitive ?
139+
FindNextStartingPositionMode.LeadingSet_RightToLeft_CaseInsensitive :
140+
FindNextStartingPositionMode.LeadingSet_RightToLeft_CaseSensitive;
149141
_asciiLookups = new uint[1][];
150142
}
151143
}

src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexTreeAnalyzer.cs

Lines changed: 12 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -13,17 +13,17 @@ internal static class RegexTreeAnalyzer
1313
public static AnalysisResults Analyze(RegexCode code)
1414
{
1515
var results = new AnalysisResults(code);
16-
results._complete = TryAnalyze(code.Tree.Root, results, isAtomicBySelfOrParent: true);
16+
results._complete = TryAnalyze(code.Tree.Root, results, isAtomicByAncestor: true);
1717
return results;
1818

19-
static bool TryAnalyze(RegexNode node, AnalysisResults results, bool isAtomicBySelfOrParent)
19+
static bool TryAnalyze(RegexNode node, AnalysisResults results, bool isAtomicByAncestor)
2020
{
2121
if (!StackHelper.TryEnsureSufficientExecutionStack())
2222
{
2323
return false;
2424
}
2525

26-
if (isAtomicBySelfOrParent)
26+
if (isAtomicByAncestor)
2727
{
2828
// We've been told by our parent that we should be considered atomic, so add ourselves
2929
// to the atomic collection.
@@ -45,14 +45,15 @@ static bool TryAnalyze(RegexNode node, AnalysisResults results, bool isAtomicByS
4545
}
4646

4747
// Update state for certain node types.
48+
bool isAtomicBySelf = false;
4849
switch (node.Kind)
4950
{
5051
// Some node types add atomicity around what they wrap. Set isAtomicBySelfOrParent to true for such nodes
5152
// even if it was false upon entering the method.
5253
case RegexNodeKind.Atomic:
5354
case RegexNodeKind.NegativeLookaround:
5455
case RegexNodeKind.PositiveLookaround:
55-
isAtomicBySelfOrParent = true;
56+
isAtomicBySelf = true;
5657
break;
5758

5859
// Track any nodes that are themselves captures.
@@ -70,7 +71,7 @@ static bool TryAnalyze(RegexNode node, AnalysisResults results, bool isAtomicByS
7071
// Determine whether the child should be treated as atomic (whether anything
7172
// can backtrack into it), which is influenced by whether this node (the child's
7273
// parent) is considered atomic by itself or by its parent.
73-
bool treatChildAsAtomic = isAtomicBySelfOrParent && node.Kind switch
74+
bool treatChildAsAtomic = (isAtomicByAncestor | isAtomicBySelf) && node.Kind switch
7475
{
7576
// If the parent is atomic, so is the child. That's the whole purpose
7677
// of the Atomic node, and lookarounds are also implicitly atomic.
@@ -104,14 +105,16 @@ static bool TryAnalyze(RegexNode node, AnalysisResults results, bool isAtomicByS
104105
}
105106

106107
// If the child contains captures, so too does this parent.
107-
if (results.MayContainCapture(child))
108+
if (results._containsCapture.Contains(child))
108109
{
109110
results._containsCapture.Add(node);
110111
}
111112

112113
// If the child might require backtracking into it, so too might the parent,
113-
// unless the parent is itself considered atomic.
114-
if (!isAtomicBySelfOrParent && results.MayBacktrack(child))
114+
// unless the parent is itself considered atomic. Here we don't consider parental
115+
// atomicity, as we need to surface upwards to the parent whether any backtracking
116+
// will be visible from this node to it.
117+
if (!isAtomicBySelf && (results._mayBacktrack?.Contains(child) == true))
115118
{
116119
(results._mayBacktrack ??= new()).Add(node);
117120
}
@@ -149,7 +152,7 @@ internal sealed class AnalysisResults
149152
/// <summary>Gets the code that was analyzed.</summary>
150153
public RegexCode Code { get; }
151154

152-
/// <summary>Gets whether a node is considered atomic based on itself or its ancestry.</summary>
155+
/// <summary>Gets whether a node is considered atomic based on its ancestry.</summary>
153156
public bool IsAtomicByAncestor(RegexNode node) => _isAtomicByAncestor.Contains(node);
154157

155158
/// <summary>Gets whether a node directly or indirectly contains captures.</summary>

src/libraries/System.Text.RegularExpressions/tests/AttRegexTests.cs renamed to src/libraries/System.Text.RegularExpressions/tests/FunctionalTests/AttRegexTests.cs

File renamed without changes.

src/libraries/System.Text.RegularExpressions/tests/CaptureCollectionTests.cs renamed to src/libraries/System.Text.RegularExpressions/tests/FunctionalTests/CaptureCollectionTests.cs

File renamed without changes.

src/libraries/System.Text.RegularExpressions/tests/CaptureCollectionTests2.cs renamed to src/libraries/System.Text.RegularExpressions/tests/FunctionalTests/CaptureCollectionTests2.cs

File renamed without changes.

src/libraries/System.Text.RegularExpressions/tests/CustomDerivedRegexScenarioTest.cs renamed to src/libraries/System.Text.RegularExpressions/tests/FunctionalTests/CustomDerivedRegexScenarioTest.cs

File renamed without changes.

src/libraries/System.Text.RegularExpressions/tests/GroupCollectionReadOnlyDictionaryTests.cs renamed to src/libraries/System.Text.RegularExpressions/tests/FunctionalTests/GroupCollectionReadOnlyDictionaryTests.cs

File renamed without changes.

src/libraries/System.Text.RegularExpressions/tests/GroupCollectionTests.cs renamed to src/libraries/System.Text.RegularExpressions/tests/FunctionalTests/GroupCollectionTests.cs

File renamed without changes.

src/libraries/System.Text.RegularExpressions/tests/GroupCollectionTests2.cs renamed to src/libraries/System.Text.RegularExpressions/tests/FunctionalTests/GroupCollectionTests2.cs

File renamed without changes.

0 commit comments

Comments
 (0)