From b6d0c75246d6fca8489cb62203e47ce6ae4d8797 Mon Sep 17 00:00:00 2001 From: Aleksey Kliger Date: Mon, 23 Mar 2020 11:21:18 -0400 Subject: [PATCH 1/3] [mono] retry mono_threads_pthread_kill if result == EAGAIN on Linux Try to address https://github.com/dotnet/runtime/issues/32377 (signal queue overflow) by sleeping and retrying a few times. --- src/mono/mono/utils/mono-threads-posix.c | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/src/mono/mono/utils/mono-threads-posix.c b/src/mono/mono/utils/mono-threads-posix.c index cd32e6b042d256..05cf8df015877b 100644 --- a/src/mono/mono/utils/mono-threads-posix.c +++ b/src/mono/mono/utils/mono-threads-posix.c @@ -170,8 +170,12 @@ mono_threads_pthread_kill (MonoThreadInfo *info, int signum) { THREADS_SUSPEND_DEBUG ("sending signal %d to %p[%p]\n", signum, info, mono_thread_info_get_tid (info)); + const int signal_queue_ovf_retry_count = 5; + const gulong signal_queue_ovf_sleep_us = 10 * 1000; /* 10 milliseconds */ + int retry_count = 0; int result; +redo: #ifdef USE_TKILL_ON_ANDROID int old_errno = errno; @@ -204,10 +208,22 @@ mono_threads_pthread_kill (MonoThreadInfo *info, int signum) result != ESRCH #if defined (__MACH__) && defined (ENOTSUP) && result != ENOTSUP +#endif +#if defined (__linux__) + && result != EAGAIN #endif ) g_error ("%s: pthread_kill failed with error %d - potential kernel OOM or signal queue overflow", __func__, result); + if (result == EAGAIN && retry_count < signal_queue_ovf_retry_count) { + /* HACK: if the signal queue overflows on linux, try again a couple of times. + * Tries to address https://github.com/dotnet/runtime/issues/32377 + */ + g_usleep (signal_queue_ovf_sleep_us); + ++retry_count; + goto redo; + } + return result; } From 1843da0b312f377aa612f0981301ef6d5aa83d41 Mon Sep 17 00:00:00 2001 From: Aleksey Kliger Date: Mon, 23 Mar 2020 12:47:23 -0400 Subject: [PATCH 2/3] fix warnings; fix Android build failure; print warning if sleeping --- src/mono/mono/utils/mono-threads-posix.c | 24 ++++++++++++++++-------- 1 file changed, 16 insertions(+), 8 deletions(-) diff --git a/src/mono/mono/utils/mono-threads-posix.c b/src/mono/mono/utils/mono-threads-posix.c index 05cf8df015877b..86d8a7d8de697f 100644 --- a/src/mono/mono/utils/mono-threads-posix.c +++ b/src/mono/mono/utils/mono-threads-posix.c @@ -170,20 +170,25 @@ mono_threads_pthread_kill (MonoThreadInfo *info, int signum) { THREADS_SUSPEND_DEBUG ("sending signal %d to %p[%p]\n", signum, info, mono_thread_info_get_tid (info)); - const int signal_queue_ovf_retry_count = 5; - const gulong signal_queue_ovf_sleep_us = 10 * 1000; /* 10 milliseconds */ - int retry_count = 0; + const int signal_queue_ovf_retry_count G_GNUC_UNUSED = 5; + const gulong signal_queue_ovf_sleep_us G_GNUC_UNUSED = 10 * 1000; /* 10 milliseconds */ + int retry_count G_GNUC_UNUSED = 0; int result; +#if defined (__linux__) redo: +#endif + #ifdef USE_TKILL_ON_ANDROID - int old_errno = errno; + { + int old_errno = errno; - result = tkill (info->native_handle, signum); + result = tkill (info->native_handle, signum); - if (result < 0) { - result = errno; - mono_set_errno (old_errno); + if (result < 0) { + result = errno; + mono_set_errno (old_errno); + } } #elif defined (HAVE_PTHREAD_KILL) result = pthread_kill (mono_thread_info_get_tid (info), signum); @@ -215,14 +220,17 @@ mono_threads_pthread_kill (MonoThreadInfo *info, int signum) ) g_error ("%s: pthread_kill failed with error %d - potential kernel OOM or signal queue overflow", __func__, result); +#if defined (__linux__) if (result == EAGAIN && retry_count < signal_queue_ovf_retry_count) { /* HACK: if the signal queue overflows on linux, try again a couple of times. * Tries to address https://github.com/dotnet/runtime/issues/32377 */ + g_warning ("%s: pthread_kill failed with error %d - potential kernel OOM or signal queue overflow, sleeping for %ld microseconds", __func__, result, signal_queue_ovf_sleep_us); g_usleep (signal_queue_ovf_sleep_us); ++retry_count; goto redo; } +#endif return result; } From 9757e2ba3447c2acf9eae1b568f0064764a249dd Mon Sep 17 00:00:00 2001 From: Aleksey Kliger Date: Mon, 23 Mar 2020 13:51:23 -0400 Subject: [PATCH 3/3] if the retry count is exceeded, error out --- src/mono/mono/utils/mono-threads-posix.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/mono/mono/utils/mono-threads-posix.c b/src/mono/mono/utils/mono-threads-posix.c index 86d8a7d8de697f..33afc458a79283 100644 --- a/src/mono/mono/utils/mono-threads-posix.c +++ b/src/mono/mono/utils/mono-threads-posix.c @@ -215,7 +215,7 @@ mono_threads_pthread_kill (MonoThreadInfo *info, int signum) && result != ENOTSUP #endif #if defined (__linux__) - && result != EAGAIN + && !(result == EAGAIN && retry_count < signal_queue_ovf_retry_count) #endif ) g_error ("%s: pthread_kill failed with error %d - potential kernel OOM or signal queue overflow", __func__, result);