Skip to content

Commit 8ced5ae

Browse files
[Release 1.1] Fix | Rework timeout timer to ensure guaranteed execution (#950)
1 parent 39873ee commit 8ced5ae

12 files changed

Lines changed: 519 additions & 58 deletions

File tree

src/Microsoft.Data.SqlClient/netcore/src/Microsoft/Data/SqlClient/SqlBulkCopy.cs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2686,7 +2686,7 @@ private void CleanUpStateObject(bool isCancelRequested = true)
26862686
{
26872687
_stateObj.CancelRequest();
26882688
}
2689-
_stateObj._internalTimeout = false;
2689+
_stateObj.SetTimeoutStateStopped();
26902690
_stateObj.CloseSession();
26912691
_stateObj._bulkCopyOpperationInProgress = false;
26922692
_stateObj._bulkCopyWriteTimeout = false;

src/Microsoft.Data.SqlClient/netcore/src/Microsoft/Data/SqlClient/SqlDataReader.cs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -952,7 +952,7 @@ private bool TryCloseInternal(bool closeReader)
952952
{
953953
_sharedState._dataReady = true; // set _sharedState._dataReady to not confuse CleanPartialRead
954954
}
955-
_stateObj._internalTimeout = false;
955+
_stateObj.SetTimeoutStateStopped();
956956
if (_sharedState._dataReady)
957957
{
958958
cleanDataFailed = true;

src/Microsoft.Data.SqlClient/netcore/src/Microsoft/Data/SqlClient/TdsParser.cs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1743,7 +1743,7 @@ internal bool TryRun(RunBehavior runBehavior, SqlCommand cmdHandler, SqlDataRead
17431743
// If there is data ready, but we didn't exit the loop, then something is wrong
17441744
Debug.Assert(!dataReady, "dataReady not expected - did we forget to skip the row?");
17451745

1746-
if (stateObj._internalTimeout)
1746+
if (stateObj.IsTimeoutStateExpired)
17471747
{
17481748
runBehavior = RunBehavior.Attention;
17491749
}
@@ -2365,7 +2365,7 @@ internal bool TryRun(RunBehavior runBehavior, SqlCommand cmdHandler, SqlDataRead
23652365
stateObj._attentionSent = false;
23662366
stateObj.HasReceivedAttention = false;
23672367

2368-
if (RunBehavior.Clean != (RunBehavior.Clean & runBehavior) && !stateObj._internalTimeout)
2368+
if (RunBehavior.Clean != (RunBehavior.Clean & runBehavior) && !stateObj.IsTimeoutStateExpired)
23692369
{
23702370
// Add attention error to collection - if not RunBehavior.Clean!
23712371
stateObj.AddError(new SqlError(0, 0, TdsEnums.MIN_ERROR_CLASS, _server, SQLMessage.OperationCancelled(), "", 0));

src/Microsoft.Data.SqlClient/netcore/src/Microsoft/Data/SqlClient/TdsParserStateObject.cs

Lines changed: 131 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,23 @@ internal enum SnapshottedStateFlags : byte
3333
AttentionReceived = 1 << 5 // NOTE: Received is not volatile as it is only ever accessed\modified by TryRun its callees (i.e. single threaded access)
3434
}
3535

36+
private sealed class TimeoutState
37+
{
38+
public const int Stopped = 0;
39+
public const int Running = 1;
40+
public const int ExpiredAsync = 2;
41+
public const int ExpiredSync = 3;
42+
43+
private readonly int _value;
44+
45+
public TimeoutState(int value)
46+
{
47+
_value = value;
48+
}
49+
50+
public int IdentityValue => _value;
51+
}
52+
3653
private const int AttentionTimeoutSeconds = 5;
3754

3855
// Ticks to consider a connection "good" after a successful I/O (10,000 ticks = 1 ms)
@@ -106,9 +123,17 @@ internal enum SnapshottedStateFlags : byte
106123
// Timeout variables
107124
private long _timeoutMilliseconds;
108125
private long _timeoutTime; // variable used for timeout computations, holds the value of the hi-res performance counter at which this request should expire
126+
private int _timeoutState; // expected to be one of the constant values TimeoutStopped, TimeoutRunning, TimeoutExpiredAsync, TimeoutExpiredSync
127+
private int _timeoutIdentitySource;
128+
private volatile int _timeoutIdentityValue;
109129
internal volatile bool _attentionSent; // true if we sent an Attention to the server
110130
internal volatile bool _attentionSending;
111-
internal bool _internalTimeout; // an internal timeout occurred
131+
132+
// Below 2 properties are used to enforce timeout delays in code to
133+
// reproduce issues related to theadpool starvation and timeout delay.
134+
// It should always be set to false by default, and only be enabled during testing.
135+
internal bool _enforceTimeoutDelay = false;
136+
internal int _enforcedTimeoutDelayInMilliSeconds = 5000;
112137

113138
private readonly LastIOTimer _lastSuccessfulIOTimer;
114139

@@ -752,7 +777,7 @@ private void ResetCancelAndProcessAttention()
752777
// operations.
753778
Parser.ProcessPendingAck(this);
754779
}
755-
_internalTimeout = false;
780+
SetTimeoutStateStopped();
756781
}
757782
}
758783

@@ -1030,7 +1055,7 @@ internal bool TryProcessHeader()
10301055
return false;
10311056
}
10321057

1033-
if (_internalTimeout)
1058+
if (IsTimeoutStateExpired)
10341059
{
10351060
ThrowExceptionAndWarning();
10361061
return true;
@@ -2261,11 +2286,62 @@ internal void OnConnectionClosed()
22612286
}
22622287
}
22632288

2264-
private void OnTimeout(object state)
2289+
public void SetTimeoutStateStopped()
2290+
{
2291+
Interlocked.Exchange(ref _timeoutState, TimeoutState.Stopped);
2292+
_timeoutIdentityValue = 0;
2293+
}
2294+
2295+
public bool IsTimeoutStateExpired
2296+
{
2297+
get
2298+
{
2299+
int state = _timeoutState;
2300+
return state == TimeoutState.ExpiredAsync || state == TimeoutState.ExpiredSync;
2301+
}
2302+
}
2303+
2304+
private void OnTimeoutAsync(object state)
2305+
{
2306+
if (_enforceTimeoutDelay)
2307+
{
2308+
Thread.Sleep(_enforcedTimeoutDelayInMilliSeconds);
2309+
}
2310+
2311+
int currentIdentityValue = _timeoutIdentityValue;
2312+
TimeoutState timeoutState = (TimeoutState)state;
2313+
if (timeoutState.IdentityValue == _timeoutIdentityValue)
2314+
{
2315+
// the return value is not useful here because no choice is going to be made using it
2316+
// we only want to make this call to set the state knowing that it will be seen later
2317+
OnTimeoutCore(TimeoutState.Running, TimeoutState.ExpiredAsync);
2318+
}
2319+
else
2320+
{
2321+
Debug.WriteLine($"OnTimeoutAsync called with identity state={timeoutState.IdentityValue} but current identity is {currentIdentityValue} so it is being ignored");
2322+
}
2323+
}
2324+
2325+
private bool OnTimeoutSync()
22652326
{
2266-
if (!_internalTimeout)
2327+
return OnTimeoutCore(TimeoutState.Running, TimeoutState.ExpiredSync);
2328+
}
2329+
2330+
/// <summary>
2331+
/// attempts to change the timout state from the expected state to the target state and if it succeeds
2332+
/// will setup the the stateobject into the timeout expired state
2333+
/// </summary>
2334+
/// <param name="expectedState">the state that is the expected current state, state will change only if this is correct</param>
2335+
/// <param name="targetState">the state that will be changed to if the expected state is correct</param>
2336+
/// <returns>boolean value indicating whether the call changed the timeout state</returns>
2337+
private bool OnTimeoutCore(int expectedState, int targetState)
2338+
{
2339+
Debug.Assert(targetState == TimeoutState.ExpiredAsync || targetState == TimeoutState.ExpiredSync, "OnTimeoutCore must have an expiry state as the targetState");
2340+
2341+
bool retval = false;
2342+
if (Interlocked.CompareExchange(ref _timeoutState, targetState, expectedState) == expectedState)
22672343
{
2268-
_internalTimeout = true;
2344+
retval = true;
22692345
// lock protects against Close and Cancel
22702346
lock (this)
22712347
{
@@ -2363,6 +2439,7 @@ private void OnTimeout(object state)
23632439
}
23642440
}
23652441
}
2442+
return retval;
23662443
}
23672444

23682445
internal void ReadSni(TaskCompletionSource<object> completion)
@@ -2397,19 +2474,32 @@ internal void ReadSni(TaskCompletionSource<object> completion)
23972474
{
23982475
Debug.Assert(completion != null, "Async on but null asyncResult passed");
23992476

2400-
if (_networkPacketTimeout == null)
2477+
// if the state is currently stopped then change it to running and allocate a new identity value from
2478+
// the identity source. The identity value is used to correlate timer callback events to the currently
2479+
// running timeout and prevents a late timer callback affecting a result it does not relate to
2480+
int previousTimeoutState = Interlocked.CompareExchange(ref _timeoutState, TimeoutState.Running, TimeoutState.Stopped);
2481+
if (previousTimeoutState == TimeoutState.Stopped)
24012482
{
2402-
_networkPacketTimeout = ADP.UnsafeCreateTimer(
2403-
new TimerCallback(OnTimeout),
2404-
null,
2405-
Timeout.Infinite,
2406-
Timeout.Infinite);
2483+
Debug.Assert(_timeoutIdentityValue == 0, "timer was previously stopped without resetting the _identityValue");
2484+
_timeoutIdentityValue = Interlocked.Increment(ref _timeoutIdentitySource);
24072485
}
24082486

2487+
_networkPacketTimeout?.Dispose();
2488+
2489+
_networkPacketTimeout = ADP.UnsafeCreateTimer(
2490+
new TimerCallback(OnTimeoutAsync),
2491+
new TimeoutState(_timeoutIdentityValue),
2492+
Timeout.Infinite,
2493+
Timeout.Infinite
2494+
);
2495+
2496+
24092497
// -1 == Infinite
24102498
// 0 == Already timed out (NOTE: To simulate the same behavior as sync we will only timeout on 0 if we receive an IO Pending from SNI)
24112499
// >0 == Actual timeout remaining
24122500
int msecsRemaining = GetTimeoutRemaining();
2501+
2502+
Debug.Assert(previousTimeoutState == TimeoutState.Stopped, "previous timeout state was not Stopped");
24132503
if (msecsRemaining > 0)
24142504
{
24152505
ChangeNetworkPacketTimeout(msecsRemaining, Timeout.Infinite);
@@ -2459,12 +2549,15 @@ internal void ReadSni(TaskCompletionSource<object> completion)
24592549
_networkPacketTaskSource.TrySetResult(null);
24602550
}
24612551
// Disable timeout timer on error
2552+
SetTimeoutStateStopped();
24622553
ChangeNetworkPacketTimeout(Timeout.Infinite, Timeout.Infinite);
24632554
}
24642555
else if (msecsRemaining == 0)
2465-
{ // Got IO Pending, but we have no time left to wait
2466-
// Immediately schedule the timeout timer to fire
2467-
ChangeNetworkPacketTimeout(0, Timeout.Infinite);
2556+
{
2557+
// Got IO Pending, but we have no time left to wait
2558+
// disable the timer and set the error state by calling OnTimeoutSync
2559+
ChangeNetworkPacketTimeout(Timeout.Infinite, Timeout.Infinite);
2560+
OnTimeoutSync();
24682561
}
24692562
// DO NOT HANDLE PENDING READ HERE - which is TdsEnums.SNI_SUCCESS_IO_PENDING state.
24702563
// That is handled by user who initiated async read, or by ReadNetworkPacket which is sync over async.
@@ -2578,13 +2671,13 @@ private void ReadSniError(TdsParserStateObject stateObj, uint error)
25782671
Debug.Assert(_syncOverAsync, "Should never reach here with async on!");
25792672
bool fail = false;
25802673

2581-
if (_internalTimeout)
2674+
if (IsTimeoutStateExpired)
25822675
{ // This is now our second timeout - time to give up.
25832676
fail = true;
25842677
}
25852678
else
25862679
{
2587-
stateObj._internalTimeout = true;
2680+
stateObj.SetTimeoutStateStopped();
25882681
Debug.Assert(_parser.Connection != null, "SqlConnectionInternalTds handler can not be null at this point.");
25892682
AddError(new SqlError(TdsEnums.TIMEOUT_EXPIRED, (byte)0x00, TdsEnums.MIN_ERROR_CLASS, _parser.Server, _parser.Connection.TimeoutErrorInternal.GetErrorMessage(), "", 0, TdsEnums.SNI_WAIT_TIMEOUT));
25902683

@@ -2807,6 +2900,25 @@ public void ReadAsyncCallback(IntPtr key, PacketHandle packet, uint error)
28072900

28082901
ChangeNetworkPacketTimeout(Timeout.Infinite, Timeout.Infinite);
28092902

2903+
// The timer thread may be unreliable under high contention scenarios. It cannot be
2904+
// assumed that the timeout has happened on the timer thread callback. Check the timeout
2905+
// synchrnously and then call OnTimeoutSync to force an atomic change of state.
2906+
if (TimeoutHasExpired)
2907+
{
2908+
OnTimeoutSync();
2909+
}
2910+
2911+
// try to change to the stopped state but only do so if currently in the running state
2912+
// and use cmpexch so that all changes out of the running state are atomic
2913+
int previousState = Interlocked.CompareExchange(ref _timeoutState, TimeoutState.Running, TimeoutState.Stopped);
2914+
2915+
// if the state is anything other than running then this query has reached an end so
2916+
// set the correlation _timeoutIdentityValue to 0 to prevent late callbacks executing
2917+
if (_timeoutState != TimeoutState.Running)
2918+
{
2919+
_timeoutIdentityValue = 0;
2920+
}
2921+
28102922
ProcessSniPacket(packet, error);
28112923
}
28122924
catch (Exception e)
@@ -3458,7 +3570,6 @@ internal void SendAttention(bool mustTakeWriteLock = false)
34583570
// Set _attentionSending to true before sending attention and reset after setting _attentionSent
34593571
// This prevents a race condition between receiving the attention ACK and setting _attentionSent
34603572
_attentionSending = true;
3461-
34623573
#if DEBUG
34633574
if (!_skipSendAttention)
34643575
{
@@ -3492,7 +3603,7 @@ internal void SendAttention(bool mustTakeWriteLock = false)
34923603
}
34933604
}
34943605
#if DEBUG
3495-
}
3606+
}
34963607
#endif
34973608

34983609
SetTimeoutSeconds(AttentionTimeoutSeconds); // Initialize new attention timeout of 5 seconds.
@@ -3827,7 +3938,7 @@ internal void AssertStateIsClean()
38273938
// Attention\Cancellation\Timeouts
38283939
Debug.Assert(!HasReceivedAttention && !_attentionSent && !_attentionSending, $"StateObj is still dealing with attention: Sent: {_attentionSent}, Received: {HasReceivedAttention}, Sending: {_attentionSending}");
38293940
Debug.Assert(!_cancelled, "StateObj still has cancellation set");
3830-
Debug.Assert(!_internalTimeout, "StateObj still has internal timeout set");
3941+
Debug.Assert(_timeoutState == TimeoutState.Stopped, "StateObj still has internal timeout set");
38313942
// Errors and Warnings
38323943
Debug.Assert(!_hasErrorOrWarning, "StateObj still has stored errors or warnings");
38333944
}

src/Microsoft.Data.SqlClient/netfx/src/Microsoft/Data/SqlClient/SqlBulkCopy.cs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2961,7 +2961,7 @@ private void CleanUpStateObject(bool isCancelRequested = true)
29612961
{
29622962
_stateObj.CancelRequest();
29632963
}
2964-
_stateObj._internalTimeout = false;
2964+
_stateObj.SetTimeoutStateStopped();
29652965
_stateObj.CloseSession();
29662966
_stateObj._bulkCopyOpperationInProgress = false;
29672967
_stateObj._bulkCopyWriteTimeout = false;

src/Microsoft.Data.SqlClient/netfx/src/Microsoft/Data/SqlClient/SqlDataReader.cs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1060,7 +1060,7 @@ private bool TryCloseInternal(bool closeReader)
10601060
{
10611061
_sharedState._dataReady = true; // set _sharedState._dataReady to not confuse CleanPartialRead
10621062
}
1063-
_stateObj._internalTimeout = false;
1063+
_stateObj.SetTimeoutStateStopped();
10641064
if (_sharedState._dataReady)
10651065
{
10661066
cleanDataFailed = true;

src/Microsoft.Data.SqlClient/netfx/src/Microsoft/Data/SqlClient/TdsParser.cs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2155,7 +2155,7 @@ internal bool TryRun(RunBehavior runBehavior, SqlCommand cmdHandler, SqlDataRead
21552155
// If there is data ready, but we didn't exit the loop, then something is wrong
21562156
Debug.Assert(!dataReady, "dataReady not expected - did we forget to skip the row?");
21572157

2158-
if (stateObj._internalTimeout)
2158+
if (stateObj.IsTimeoutStateExpired)
21592159
{
21602160
runBehavior = RunBehavior.Attention;
21612161
}
@@ -2779,7 +2779,7 @@ internal bool TryRun(RunBehavior runBehavior, SqlCommand cmdHandler, SqlDataRead
27792779
stateObj._attentionSent = false;
27802780
stateObj._attentionReceived = false;
27812781

2782-
if (RunBehavior.Clean != (RunBehavior.Clean & runBehavior) && !stateObj._internalTimeout)
2782+
if (RunBehavior.Clean != (RunBehavior.Clean & runBehavior) && !stateObj.IsTimeoutStateExpired)
27832783
{
27842784
// Add attention error to collection - if not RunBehavior.Clean!
27852785
stateObj.AddError(new SqlError(0, 0, TdsEnums.MIN_ERROR_CLASS, _server, SQLMessage.OperationCancelled(), "", 0));

0 commit comments

Comments
 (0)