From b1a6ca9241fa808424ba3c11ae480fd445c02e0f Mon Sep 17 00:00:00 2001 From: jim19930609 Date: Tue, 3 Aug 2021 07:33:31 +0000 Subject: [PATCH 1/4] Add function to disable paddle signal handler Paddle used google::InstallFaultSignalHandler to handle selected system signals, mainly for debugging and bug report purposes. However, this can be conflicted with other python packages whoever captures similar signals. Such python package involves tvm and more To resolve this issue, we support a function to disable signal handler --- paddle/fluid/platform/init.cc | 28 ++++++++--- paddle/fluid/platform/init.h | 2 + paddle/fluid/pybind/pybind.cc | 2 + python/paddle/__init__.py | 2 + python/paddle/fluid/framework.py | 2 + .../unittests/test_disable_signal_handler.py | 48 +++++++++++++++++++ 6 files changed, 78 insertions(+), 6 deletions(-) create mode 100644 python/paddle/fluid/tests/unittests/test_disable_signal_handler.py diff --git a/paddle/fluid/platform/init.cc b/paddle/fluid/platform/init.cc index 3ee5a578601045..afae0465311433 100644 --- a/paddle/fluid/platform/init.cc +++ b/paddle/fluid/platform/init.cc @@ -11,6 +11,7 @@ distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ +#include #include #include @@ -245,15 +246,16 @@ void InitDevices(const std::vector devices) { // Description Quoted from // https://pubs.opengroup.org/onlinepubs/9699919799/basedefs/signal.h.html const struct { + int signal_number; const char *name; const char *error_string; } SignalErrorStrings[] = { - {"SIGSEGV", "Segmentation fault"}, - {"SIGILL", "Illegal instruction"}, - {"SIGFPE", "Erroneous arithmetic operation"}, - {"SIGABRT", "Process abort signal"}, - {"SIGBUS", "Access to an undefined portion of a memory object"}, - {"SIGTERM", "Termination signal"}, + {SIGSEGV, "SIGSEGV", "Segmentation fault"}, + {SIGILL, "SIGILL", "Illegal instruction"}, + {SIGFPE, "SIGFPE", "Erroneous arithmetic operation"}, + {SIGABRT, "SIGABRT", "Process abort signal"}, + {SIGBUS, "SIGBUS", "Access to an undefined portion of a memory object"}, + {SIGTERM, "SIGTERM", "Termination signal"}, }; bool StartsWith(const char *str, const char *prefix) { @@ -319,7 +321,21 @@ void SignalHandle(const char *data, int size) { // will Kill program by the default signal handler } } +#endif // _WIN32 + +void DisableSignalHandler() { +#ifndef _WIN32 + for (size_t i = 0; + i < (sizeof(SignalErrorStrings) / sizeof(*(SignalErrorStrings))); ++i) { + int signal_number = SignalErrorStrings[i].signal_number; + struct sigaction sig_action; + memset(&sig_action, 0, sizeof(sig_action)); + sigemptyset(&sig_action.sa_mask); + sig_action.sa_handler = SIG_DFL; + sigaction(signal_number, &sig_action, NULL); + } #endif +} #ifdef WITH_WIN_DUMP_DBG typedef BOOL(WINAPI *MINIDUMP_WRITE_DUMP)( diff --git a/paddle/fluid/platform/init.h b/paddle/fluid/platform/init.h index cd5ef843fa8f7d..b52456b19ac662 100644 --- a/paddle/fluid/platform/init.h +++ b/paddle/fluid/platform/init.h @@ -61,5 +61,7 @@ class SignalMessageDumper { void SignalHandle(const char* data, int size); #endif +void DisableSignalHandler(); + } // namespace framework } // namespace paddle diff --git a/paddle/fluid/pybind/pybind.cc b/paddle/fluid/pybind/pybind.cc index 7137115ac0a396..8f35eb42d4bae2 100644 --- a/paddle/fluid/pybind/pybind.cc +++ b/paddle/fluid/pybind/pybind.cc @@ -506,6 +506,8 @@ PYBIND11_MODULE(core_noavx, m) { m.def("set_num_threads", &platform::SetNumThreads); + m.def("disable_signal_handler", &DisableSignalHandler); + #if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) m.def("cudnn_version", &platform::CudnnVersion); #endif diff --git a/python/paddle/__init__.py b/python/paddle/__init__.py index f72fb6c1806b10..97b6d8695b3d72 100755 --- a/python/paddle/__init__.py +++ b/python/paddle/__init__.py @@ -271,6 +271,7 @@ from .device import get_device # noqa: F401 from .fluid.framework import is_compiled_with_cuda # noqa: F401 from .fluid.framework import is_compiled_with_rocm # noqa: F401 +from .fluid.framework import disable_signal_handler # noqa: F401 from .device import is_compiled_with_xpu # noqa: F401 from .device import is_compiled_with_npu # noqa: F401 from .device import XPUPlace # noqa: F401 @@ -483,6 +484,7 @@ 'enable_static', 'scatter_nd', 'set_default_dtype', + 'disable_signal_handler', 'expand_as', 'stack', 'sqrt', diff --git a/python/paddle/fluid/framework.py b/python/paddle/fluid/framework.py index 10b7292a0b6bb5..4f2f7104af8f8c 100644 --- a/python/paddle/fluid/framework.py +++ b/python/paddle/fluid/framework.py @@ -35,6 +35,7 @@ from .proto import framework_pb2 from . import core +from .core import disable_signal_handler from . import unique_name import paddle.version as fluid_version import warnings @@ -60,6 +61,7 @@ 'device_guard', 'set_flags', 'get_flags', + 'disable_signal_handler', ] EMPTY_VAR_NAME = core.kEmptyVarName() diff --git a/python/paddle/fluid/tests/unittests/test_disable_signal_handler.py b/python/paddle/fluid/tests/unittests/test_disable_signal_handler.py new file mode 100644 index 00000000000000..357d080338a90d --- /dev/null +++ b/python/paddle/fluid/tests/unittests/test_disable_signal_handler.py @@ -0,0 +1,48 @@ +# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import print_function + +import unittest +import numpy as np +import signal, os +import paddle +import subprocess + +SignalsToTest = { + signal.SIGTERM, signal.SIGBUS, signal.SIGABRT, signal.SIGSEGV, + signal.SIGILL, signal.SIGFPE +} + + +class TestSignOpError(unittest.TestCase): + def test_errors(self): + try: + for sig in SignalsToTest: + output = subprocess.check_output( + [ + "python", "-c", + f"import paddle; import signal,os; paddle.disable_signal_handler(); os.kill(os.getpid(), {sig})" + ], + stderr=subprocess.STDOUT) + except Exception as e: + # If paddle signal handler is enabled + # One would expect "paddle::framework::SignalHandle" in STDERR + stdout_message = str(e.output) + if "paddle::framework::SignalHandle" in stdout_message: + raise Exception("Paddle signal handler not disabled") + + +if __name__ == "__main__": + unittest.main() From dbaa63e943f9283b72bcf9f17b1f56c081389944 Mon Sep 17 00:00:00 2001 From: jim19930609 Date: Thu, 5 Aug 2021 02:56:48 +0000 Subject: [PATCH 2/4] Remove signal test from WIN32 platform --- python/paddle/fluid/framework.py | 20 ++++++++++++++++++- .../fluid/tests/unittests/CMakeLists.txt | 1 + 2 files changed, 20 insertions(+), 1 deletion(-) diff --git a/python/paddle/fluid/framework.py b/python/paddle/fluid/framework.py index 4f2f7104af8f8c..1d9ea990d4cd2f 100644 --- a/python/paddle/fluid/framework.py +++ b/python/paddle/fluid/framework.py @@ -35,7 +35,6 @@ from .proto import framework_pb2 from . import core -from .core import disable_signal_handler from . import unique_name import paddle.version as fluid_version import warnings @@ -397,6 +396,25 @@ def is_compiled_with_xpu(): return core.is_compiled_with_xpu() +def disable_signal_handler(): + """ + Reset signal handler registered by Paddle. + + Paddle installs signal handlers at C++ level to log debug information upon failing. + However, conflicts can happen if another python module is making use of such signal. + Such being the case, one may disblae paddle signal handler via this interface. + + Returns: None + + Examples: + .. code-block:: python + + import paddle + paddle.disable_signal_handler() + """ + return core.disable_signal_handler() + + def is_compiled_with_cuda(): """ Whether this whl package can be used to run the model on GPU. diff --git a/python/paddle/fluid/tests/unittests/CMakeLists.txt b/python/paddle/fluid/tests/unittests/CMakeLists.txt index 9d8b5fb699e33a..2f6044e1047aca 100644 --- a/python/paddle/fluid/tests/unittests/CMakeLists.txt +++ b/python/paddle/fluid/tests/unittests/CMakeLists.txt @@ -111,6 +111,7 @@ if(((NOT WITH_ROCM) AND (NOT WITH_GPU)) OR WIN32) LIST(REMOVE_ITEM TEST_OPS test_memcpy_op) LIST(REMOVE_ITEM TEST_OPS test_raw_program_optimizer) LIST(REMOVE_ITEM TEST_OPS test_fleet_gradient_scale) + LIST(REMOVE_ITEM TEST_OPS test_disable_signal_handler) endif() if(WIN32) From 4278997992ea9e0d76fbec19b9819aad4615d30b Mon Sep 17 00:00:00 2001 From: jim19930609 Date: Thu, 5 Aug 2021 10:38:36 +0000 Subject: [PATCH 3/4] Remove redundant return from disable_signal_handler() function --- python/paddle/fluid/framework.py | 2 +- .../paddle/fluid/tests/unittests/test_disable_signal_handler.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/python/paddle/fluid/framework.py b/python/paddle/fluid/framework.py index 1d9ea990d4cd2f..220f850419f5d0 100644 --- a/python/paddle/fluid/framework.py +++ b/python/paddle/fluid/framework.py @@ -412,7 +412,7 @@ def disable_signal_handler(): import paddle paddle.disable_signal_handler() """ - return core.disable_signal_handler() + core.disable_signal_handler() def is_compiled_with_cuda(): diff --git a/python/paddle/fluid/tests/unittests/test_disable_signal_handler.py b/python/paddle/fluid/tests/unittests/test_disable_signal_handler.py index 357d080338a90d..dbe9dcb7f823d7 100644 --- a/python/paddle/fluid/tests/unittests/test_disable_signal_handler.py +++ b/python/paddle/fluid/tests/unittests/test_disable_signal_handler.py @@ -1,4 +1,4 @@ -# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. From c5a8cae7c5066f21219605353a2cb66f63c1db28 Mon Sep 17 00:00:00 2001 From: jim19930609 Date: Fri, 13 Aug 2021 03:38:03 +0000 Subject: [PATCH 4/4] Add detailed messages to en_doc --- python/paddle/fluid/framework.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/python/paddle/fluid/framework.py b/python/paddle/fluid/framework.py index 220f850419f5d0..0c2aafc996e2f7 100644 --- a/python/paddle/fluid/framework.py +++ b/python/paddle/fluid/framework.py @@ -60,7 +60,6 @@ 'device_guard', 'set_flags', 'get_flags', - 'disable_signal_handler', ] EMPTY_VAR_NAME = core.kEmptyVarName() @@ -403,6 +402,12 @@ def disable_signal_handler(): Paddle installs signal handlers at C++ level to log debug information upon failing. However, conflicts can happen if another python module is making use of such signal. Such being the case, one may disblae paddle signal handler via this interface. + + Known frameworks that require disabling signal handler includes: + 1. TVM + 2. ADLIK + + Make sure you called paddle.disable_signal_handler() before using above mentioned frameworks. Returns: None