@@ -390,10 +390,10 @@ public SymbolicMatch FindMatch(RegexRunnerMode mode, ReadOnlySpan<char> input, i
390390 matchStart = matchEnd < startat ?
391391 startat : ( _pattern . _info . ContainsLineAnchor , _pattern . _info . ContainsSomeAnchor ) switch
392392 {
393- ( true , true ) => FindStartPosition < FullInputReader , FullNullabilityHandler > ( input , matchEnd , matchStartLowBoundary , perThreadData ) ,
394- ( true , false ) => FindStartPosition < FullInputReader , NoAnchorsNullabilityHandler > ( input , matchEnd , matchStartLowBoundary , perThreadData ) ,
395- ( false , true ) => FindStartPosition < NoZAnchorInputReader , FullNullabilityHandler > ( input , matchEnd , matchStartLowBoundary , perThreadData ) ,
396- ( false , false ) => FindStartPosition < NoZAnchorInputReader , NoAnchorsNullabilityHandler > ( input , matchEnd , matchStartLowBoundary , perThreadData ) ,
393+ ( true , true ) => FindStartPosition < FullInputReader , FullNullabilityHandler > ( input , matchEnd , timeoutOccursAt , matchStartLowBoundary , perThreadData ) ,
394+ ( true , false ) => FindStartPosition < FullInputReader , NoAnchorsNullabilityHandler > ( input , matchEnd , timeoutOccursAt , matchStartLowBoundary , perThreadData ) ,
395+ ( false , true ) => FindStartPosition < NoZAnchorInputReader , FullNullabilityHandler > ( input , matchEnd , timeoutOccursAt , matchStartLowBoundary , perThreadData ) ,
396+ ( false , false ) => FindStartPosition < NoZAnchorInputReader , NoAnchorsNullabilityHandler > ( input , matchEnd , timeoutOccursAt , matchStartLowBoundary , perThreadData ) ,
397397 } ;
398398 }
399399
@@ -409,8 +409,8 @@ public SymbolicMatch FindMatch(RegexRunnerMode mode, ReadOnlySpan<char> input, i
409409 else
410410 {
411411 Registers endRegisters = _pattern . _info . ContainsLineAnchor ?
412- FindSubcaptures < FullInputReader > ( input , matchStart , matchEnd , perThreadData ) :
413- FindSubcaptures < NoZAnchorInputReader > ( input , matchStart , matchEnd , perThreadData ) ;
412+ FindSubcaptures < FullInputReader > ( input , matchStart , timeoutOccursAt , matchEnd , perThreadData ) :
413+ FindSubcaptures < NoZAnchorInputReader > ( input , matchStart , timeoutOccursAt , matchEnd , perThreadData ) ;
414414 return new SymbolicMatch ( matchStart , matchEnd - matchStart , endRegisters . CaptureStarts , endRegisters . CaptureEnds ) ;
415415 }
416416 }
@@ -442,14 +442,9 @@ private int FindEndPosition<TInputReader, TFindOptimizationsHandler, TNullabilit
442442 while ( true )
443443 {
444444 // Now run the DFA or NFA traversal from the current point using the current state. If timeouts are being checked,
445- // we need to pop out of the inner loop every now and then to do the timeout check in this outer loop. Note that
446- // the timeout exists not to provide perfect guarantees around execution time but rather as a mitigation against
447- // catastrophic backtracking. Catastrophic backtracking is not an issue for the NonBacktracking engine, but we
448- // still check the timeout now and again to provide some semblance of the behavior a developer experiences with
449- // the backtracking engines. We can, however, choose a large number here, since it's not actually needed for security.
450- const int CharsPerTimeoutCheck = 1_000 ;
451- int innerLoopLength = _checkTimeout && input . Length - pos > CharsPerTimeoutCheck ?
452- pos + CharsPerTimeoutCheck :
445+ // we need to pop out of the inner loop every now and then to do the timeout check in this outer loop.
446+ int innerLoopLength = _checkTimeout && input . Length - pos > SymbolicRegexThresholds . CharsPerTimeoutCheckStartEnd ?
447+ pos + SymbolicRegexThresholds . CharsPerTimeoutCheckStartEnd :
453448 input . Length ;
454449
455450 bool done = currentState . NfaState is not null ?
@@ -591,10 +586,11 @@ private bool FindEndPositionDeltas<TStateHandler, TInputReader, TFindOptimizatio
591586 /// </remarks>
592587 /// <param name="input">The input text.</param>
593588 /// <param name="i">The ending position to walk backwards from. <paramref name="i"/> points one past the last character of the match.</param>
589+ /// <param name="timeoutOccursAt">The time at which timeout occurs, if timeouts are being checked.</param>
594590 /// <param name="matchStartBoundary">The initial starting location discovered in phase 1, a point we must not walk earlier than.</param>
595591 /// <param name="perThreadData">Per thread data reused between calls.</param>
596592 /// <returns>The found starting position for the match.</returns>
597- private int FindStartPosition < TInputReader , TNullabilityHandler > ( ReadOnlySpan < char > input , int i , int matchStartBoundary , PerThreadData perThreadData )
593+ private int FindStartPosition < TInputReader , TNullabilityHandler > ( ReadOnlySpan < char > input , int i , long timeoutOccursAt , int matchStartBoundary , PerThreadData perThreadData )
598594 where TInputReader : struct , IInputReader
599595 where TNullabilityHandler : struct , INullabilityHandler
600596 {
@@ -611,24 +607,40 @@ private int FindStartPosition<TInputReader, TNullabilityHandler>(ReadOnlySpan<ch
611607 // Walk backwards to the furthest accepting state of the reverse pattern but no earlier than matchStartBoundary.
612608 while ( true )
613609 {
614- // Run the DFA or NFA traversal backwards from the current point using the current state.
610+ // Run the DFA or NFA traversal backwards from the current point using the current state. If timeouts are being checked,
611+ // we need to pop out of the inner loop every now and then to do the timeout check in this outer loop.
612+ int innerLoopStartBoundary = _checkTimeout && i - matchStartBoundary > SymbolicRegexThresholds . CharsPerTimeoutCheckStartEnd ?
613+ i - SymbolicRegexThresholds . CharsPerTimeoutCheckStartEnd :
614+ matchStartBoundary ;
615+
615616 bool done = currentState . NfaState is not null ?
616- FindStartPositionDeltas < NfaStateHandler , TInputReader , TNullabilityHandler > ( input , ref i , matchStartBoundary , ref currentState , ref lastStart ) :
617- FindStartPositionDeltas < DfaStateHandler , TInputReader , TNullabilityHandler > ( input , ref i , matchStartBoundary , ref currentState , ref lastStart ) ;
617+ FindStartPositionDeltas < NfaStateHandler , TInputReader , TNullabilityHandler > ( input , ref i , innerLoopStartBoundary , ref currentState , ref lastStart ) :
618+ FindStartPositionDeltas < DfaStateHandler , TInputReader , TNullabilityHandler > ( input , ref i , innerLoopStartBoundary , ref currentState , ref lastStart ) ;
618619
619- // If we found the starting position, we're done.
620- if ( done )
620+ // If the inner loop indicates that the search finished (for example due to reaching a deadend state) or
621+ // there is no more input available, then the whole search is done.
622+ if ( done || i <= matchStartBoundary )
621623 {
622624 break ;
623625 }
624626
625- // We didn't find the starting position but we did exit out of the backwards traversal. That should only happen
626- // if we were unable to transition, which should only happen if we were in DFA mode and exceeded our graph size.
627- // Upgrade to NFA mode and continue.
628- Debug . Assert ( i >= matchStartBoundary ) ;
629- NfaMatchingState nfaState = perThreadData . NfaState ;
630- nfaState . InitializeFrom ( this , GetState ( currentState . DfaStateId ) ) ;
631- currentState = new CurrentState ( nfaState ) ;
627+ // The search did not finish, so we either failed to transition (which should only happen if we were in DFA mode and
628+ // need to switch over to NFA mode) or ran out of input in the inner loop. Check if the inner loop still had more
629+ // input available.
630+ if ( i > innerLoopStartBoundary )
631+ {
632+ // Because there was still more input available, a failure to transition in DFA mode must be the cause
633+ // of the early exit. Upgrade to NFA mode.
634+ NfaMatchingState nfaState = perThreadData . NfaState ;
635+ nfaState . InitializeFrom ( this , GetState ( currentState . DfaStateId ) ) ;
636+ currentState = new CurrentState ( nfaState ) ;
637+ }
638+
639+ // Check for a timeout before continuing.
640+ if ( _checkTimeout )
641+ {
642+ CheckTimeout ( timeoutOccursAt ) ;
643+ }
632644 }
633645
634646 Debug . Assert ( lastStart != - 1 , "We expected to find a starting position but didn't." ) ;
@@ -663,18 +675,17 @@ private bool FindStartPositionDeltas<TStateHandler, TInputReader, TNullabilityHa
663675 lastStart = pos ;
664676 }
665677
666- // If we are past the start threshold or if the state is a dead end, bail; we should have already
667- // found a valid starting location.
668- if ( pos <= startThreshold || flags . IsDeadend ( ) )
678+ // If the state is a dead end, bail; we should have already found a valid starting location.
679+ if ( flags . IsDeadend ( ) )
669680 {
670681 Debug . Assert ( lastStart != - 1 ) ;
671682 return true ;
672683 }
673684
674- // Try to transition with the next character, the one before the current position.
675- if ( ! TStateHandler . TryTakeTransition ( this , ref state , positionId ) )
685+ // If we are not past the start threshold, try to transition with the next character, the one before the current position.
686+ if ( pos <= startThreshold || ! TStateHandler . TryTakeTransition ( this , ref state , positionId ) )
676687 {
677- // Return false to indicate the search didn't finish.
688+ // Return false to indicate the search possibly didn't finish.
678689 return false ;
679690 }
680691
@@ -693,10 +704,11 @@ private bool FindStartPositionDeltas<TStateHandler, TInputReader, TNullabilityHa
693704 /// <summary>Run the pattern on a match to record the capture starts and ends.</summary>
694705 /// <param name="input">input span</param>
695706 /// <param name="i">inclusive start position</param>
707+ /// <param name="timeoutOccursAt">The time at which timeout occurs, if timeouts are being checked.</param>
696708 /// <param name="iEnd">exclusive end position</param>
697709 /// <param name="perThreadData">Per thread data reused between calls.</param>
698710 /// <returns>the final register values, which indicate capture starts and ends</returns>
699- private Registers FindSubcaptures < TInputReader > ( ReadOnlySpan < char > input , int i , int iEnd , PerThreadData perThreadData )
711+ private Registers FindSubcaptures < TInputReader > ( ReadOnlySpan < char > input , int i , long timeoutOccursAt , int iEnd , PerThreadData perThreadData )
700712 where TInputReader : struct , IInputReader
701713 {
702714 // Pick the correct start state based on previous character kind.
@@ -722,54 +734,68 @@ private Registers FindSubcaptures<TInputReader>(ReadOnlySpan<char> input, int i,
722734
723735 while ( ( uint ) i < ( uint ) iEnd )
724736 {
725- Debug . Assert ( next . Count == 0 ) ;
737+ // If timeouts are being checked, we need to pop out of the inner loop every now and then to do the timeout check in this outer loop.
738+ int innerLoopEnd = _checkTimeout && iEnd - i > SymbolicRegexThresholds . CharsPerTimeoutCheckSubcaptures ?
739+ i + SymbolicRegexThresholds . CharsPerTimeoutCheckSubcaptures :
740+ iEnd ;
726741
727- // i is guaranteed to be within bounds, so the position ID is a minterm ID
728- int mintermId = TInputReader . GetPositionId ( this , input , i ) ;
729-
730- foreach ( ( int sourceId , Registers sourceRegisters ) in current . Values )
742+ while ( ( uint ) i < ( uint ) innerLoopEnd )
731743 {
732- // Get or create the transitions
733- int offset = DeltaOffset ( sourceId , mintermId ) ;
734- ( int , DerivativeEffect [ ] ) [ ] transitions = _capturingNfaDelta [ offset ] ??
735- CreateNewCapturingTransition ( sourceId , mintermId , offset ) ;
744+ Debug . Assert ( next . Count == 0 ) ;
745+
746+ // i is guaranteed to be within bounds, so the position ID is a minterm ID
747+ int mintermId = TInputReader . GetPositionId ( this , input , i ) ;
736748
737- // Take the transitions in their prioritized order
738- for ( int j = 0 ; j < transitions . Length ; ++ j )
749+ foreach ( ( int sourceId , Registers sourceRegisters ) in current . Values )
739750 {
740- ( int targetStateId , DerivativeEffect [ ] effects ) = transitions [ j ] ;
751+ // Get or create the transitions
752+ int offset = DeltaOffset ( sourceId , mintermId ) ;
753+ ( int , DerivativeEffect [ ] ) [ ] transitions = _capturingNfaDelta [ offset ] ??
754+ CreateNewCapturingTransition ( sourceId , mintermId , offset ) ;
741755
742- // Try to add the state and handle the case where it didn't exist before. If the state already
743- // exists, then the transition can be safely ignored, as the existing state was generated by a
744- // higher priority transition.
745- if ( next . Add ( targetStateId , out int index ) )
756+ // Take the transitions in their prioritized order
757+ for ( int j = 0 ; j < transitions . Length ; ++ j )
746758 {
747- // Avoid copying the registers on the last transition from this state, reusing the registers instead
748- Registers newRegisters = j != transitions . Length - 1 ? sourceRegisters . Clone ( ) : sourceRegisters ;
749- newRegisters . ApplyEffects ( effects , i ) ;
750- next . Update ( index , targetStateId , newRegisters ) ;
759+ ( int targetStateId , DerivativeEffect [ ] effects ) = transitions [ j ] ;
751760
752- int coreStateId = GetCoreStateId ( targetStateId ) ;
753- StateFlags flags = _stateFlagsArray [ coreStateId ] ;
754- Debug . Assert ( ! flags . IsDeadend ( ) ) ;
755-
756- if ( flags . IsNullable ( ) || ( flags . CanBeNullable ( ) && GetState ( coreStateId ) . IsNullableFor ( GetCharKind < TInputReader > ( input , i + 1 ) ) ) )
761+ // Try to add the state and handle the case where it didn't exist before. If the state already
762+ // exists, then the transition can be safely ignored, as the existing state was generated by a
763+ // higher priority transition.
764+ if ( next . Add ( targetStateId , out int index ) )
757765 {
758- // No lower priority transitions from this or other source states are taken because the
759- // backtracking engines would return the match ending here.
760- goto BreakNullable ;
766+ // Avoid copying the registers on the last transition from this state, reusing the registers instead
767+ Registers newRegisters = j != transitions . Length - 1 ? sourceRegisters . Clone ( ) : sourceRegisters ;
768+ newRegisters . ApplyEffects ( effects , i ) ;
769+ next . Update ( index , targetStateId , newRegisters ) ;
770+
771+ int coreStateId = GetCoreStateId ( targetStateId ) ;
772+ StateFlags flags = _stateFlagsArray [ coreStateId ] ;
773+ Debug . Assert ( ! flags . IsDeadend ( ) ) ;
774+
775+ if ( flags . IsNullable ( ) || ( flags . CanBeNullable ( ) && GetState ( coreStateId ) . IsNullableFor ( GetCharKind < TInputReader > ( input , i + 1 ) ) ) )
776+ {
777+ // No lower priority transitions from this or other source states are taken because the
778+ // backtracking engines would return the match ending here.
779+ goto BreakNullable ;
780+ }
761781 }
762782 }
763783 }
784+
785+ BreakNullable :
786+ // Swap the state sets and prepare for the next character
787+ SparseIntMap < Registers > tmp = current ;
788+ current = next ;
789+ next = tmp ;
790+ next . Clear ( ) ;
791+ i ++ ;
764792 }
765793
766- BreakNullable :
767- // Swap the state sets and prepare for the next character
768- SparseIntMap < Registers > tmp = current ;
769- current = next ;
770- next = tmp ;
771- next . Clear ( ) ;
772- i ++ ;
794+ // Check for a timeout before continuing.
795+ if ( _checkTimeout )
796+ {
797+ CheckTimeout ( timeoutOccursAt ) ;
798+ }
773799 }
774800
775801 Debug . Assert ( current . Count > 0 ) ;
0 commit comments