Skip to content

Commit c67efd8

Browse files
authored
[mono] retry mono_threads_pthread_kill if result == EAGAIN on Linux (#33966)
* [mono] retry mono_threads_pthread_kill if result == EAGAIN on Linux Try to address #32377 (signal queue overflow) by sleeping and retrying a few times.
1 parent eff3797 commit c67efd8

File tree

1 file changed

+29
-5
lines changed

1 file changed

+29
-5
lines changed

src/mono/mono/utils/mono-threads-posix.c

Lines changed: 29 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -170,16 +170,25 @@ mono_threads_pthread_kill (MonoThreadInfo *info, int signum)
170170
{
171171
THREADS_SUSPEND_DEBUG ("sending signal %d to %p[%p]\n", signum, info, mono_thread_info_get_tid (info));
172172

173+
const int signal_queue_ovf_retry_count G_GNUC_UNUSED = 5;
174+
const gulong signal_queue_ovf_sleep_us G_GNUC_UNUSED = 10 * 1000; /* 10 milliseconds */
175+
int retry_count G_GNUC_UNUSED = 0;
173176
int result;
174177

178+
#if defined (__linux__)
179+
redo:
180+
#endif
181+
175182
#ifdef USE_TKILL_ON_ANDROID
176-
int old_errno = errno;
183+
{
184+
int old_errno = errno;
177185

178-
result = tkill (info->native_handle, signum);
186+
result = tkill (info->native_handle, signum);
179187

180-
if (result < 0) {
181-
result = errno;
182-
mono_set_errno (old_errno);
188+
if (result < 0) {
189+
result = errno;
190+
mono_set_errno (old_errno);
191+
}
183192
}
184193
#elif defined (HAVE_PTHREAD_KILL)
185194
result = pthread_kill (mono_thread_info_get_tid (info), signum);
@@ -204,10 +213,25 @@ mono_threads_pthread_kill (MonoThreadInfo *info, int signum)
204213
result != ESRCH
205214
#if defined (__MACH__) && defined (ENOTSUP)
206215
&& result != ENOTSUP
216+
#endif
217+
#if defined (__linux__)
218+
&& !(result == EAGAIN && retry_count < signal_queue_ovf_retry_count)
207219
#endif
208220
)
209221
g_error ("%s: pthread_kill failed with error %d - potential kernel OOM or signal queue overflow", __func__, result);
210222

223+
#if defined (__linux__)
224+
if (result == EAGAIN && retry_count < signal_queue_ovf_retry_count) {
225+
/* HACK: if the signal queue overflows on linux, try again a couple of times.
226+
* Tries to address https://github.com/dotnet/runtime/issues/32377
227+
*/
228+
g_warning ("%s: pthread_kill failed with error %d - potential kernel OOM or signal queue overflow, sleeping for %ld microseconds", __func__, result, signal_queue_ovf_sleep_us);
229+
g_usleep (signal_queue_ovf_sleep_us);
230+
++retry_count;
231+
goto redo;
232+
}
233+
#endif
234+
211235
return result;
212236
}
213237

0 commit comments

Comments
 (0)