Skip to content

Commit 8d89ca9

Browse files
committed
Reliably force restart when a subprocess has a fatal error.
Suppose a system call such as bind() fails in the sockd subprocess in request_sockd_fd(). sockd will suicide(). This will send a SIGCHLD to the master process, which the master process should respond to by calling suicide(), forcing a process supervisor to respawn the entire ndhc program. But, this doesn't reliably happen prior to this commit because of the interaction between request_sock_fd() and signalfd() [or equivalently self-pipe-trick] signal handling. request_sock_fd() makes ndhc-master synchronously wait for a response from sockd via safe_recvmsg(). The normal goto-like signal handling path is suppressed when using signalfd() , so when SIGCHLD is received, it will not be handled until io is dispatched for the signalfd or pipe. But such code will never be reached because ndhc-master is waiting in safe_recvmsg() and thus never polls signal fd status. So, revert to using traditional POSIX sigaction() for SIGCHLD, which provides exactly the required behavior for proper functioning.
1 parent f0340b1 commit 8d89ca9

1 file changed

Lines changed: 26 additions & 3 deletions

File tree

src/ndhc.c

Lines changed: 26 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -158,17 +158,42 @@ void show_usage(void)
158158
exit(EXIT_SUCCESS);
159159
}
160160

161+
static void signal_handler(int signo)
162+
{
163+
switch (signo) {
164+
case SIGCHLD: {
165+
static const char errstr[] = "ndhc-master: Subprocess terminated unexpectedly. Exiting.";
166+
safe_write(STDOUT_FILENO, errstr, sizeof errstr - 1);
167+
exit(EXIT_FAILURE);
168+
}
169+
default:
170+
break;
171+
}
172+
}
173+
161174
static void setup_signals_ndhc(void)
162175
{
163176
sigset_t mask;
164177
sigemptyset(&mask);
165178
sigaddset(&mask, SIGUSR1);
166179
sigaddset(&mask, SIGUSR2);
167-
sigaddset(&mask, SIGCHLD);
168180
sigaddset(&mask, SIGTERM);
169181
sigaddset(&mask, SIGINT);
170182
if (sigprocmask(SIG_BLOCK, &mask, (sigset_t *)0) < 0)
171183
suicide("sigprocmask failed");
184+
185+
sigemptyset(&mask);
186+
sigaddset(&mask, SIGCHLD);
187+
if (sigprocmask(SIG_UNBLOCK, &mask, (sigset_t *)0) < 0)
188+
suicide("sigprocmask failed");
189+
struct sigaction sa = {
190+
.sa_handler = signal_handler,
191+
.sa_flags = SA_RESTART,
192+
};
193+
sigemptyset(&sa.sa_mask);
194+
if (sigaction(SIGCHLD, &sa, NULL))
195+
suicide("sigaction failed");
196+
172197
if (cs.signalFd >= 0) {
173198
epoll_del(cs.epollFd, cs.signalFd);
174199
close(cs.signalFd);
@@ -197,8 +222,6 @@ static int signal_dispatch(void)
197222
switch (si.ssi_signo) {
198223
case SIGUSR1: return SIGNAL_RENEW;
199224
case SIGUSR2: return SIGNAL_RELEASE;
200-
case SIGCHLD:
201-
suicide("ndhc-master: Subprocess terminated unexpectedly. Exiting.");
202225
case SIGTERM:
203226
log_line("Received SIGTERM. Exiting gracefully.");
204227
exit(EXIT_SUCCESS);

0 commit comments

Comments
 (0)