From ddcd122dc7f16d4fc17346adf14c6810dbea045d Mon Sep 17 00:00:00 2001 From: Jan Kratochvil Date: Fri, 11 Aug 2023 10:36:42 +0200 Subject: [PATCH 1/4] Apply env(ICECC_SLOW_NETWORK) also to setsockopt(). --- services/comm.cpp | 24 +++++++++++++++++++----- 1 file changed, 19 insertions(+), 5 deletions(-) diff --git a/services/comm.cpp b/services/comm.cpp index c43732d5..bfae08dd 100644 --- a/services/comm.cpp +++ b/services/comm.cpp @@ -339,10 +339,22 @@ void MsgChannel::writefull(const void *_buf, size_t count) msgtogo += count; } +static bool slow_network() +{ + static bool retval = false; + static bool cached = false; + if (!cached) { + if (const char *icecc_slow_network = getenv("ICECC_SLOW_NETWORK")) + if (icecc_slow_network[0] == '1') + retval = true; + cached = true; + } + return retval; +} + static size_t get_max_write_size() { - if( const char* icecc_slow_network = getenv( "ICECC_SLOW_NETWORK" )) - if( icecc_slow_network[ 0 ] == '1' ) + if (slow_network()) return MAX_SLOW_WRITE_SIZE; return MAX_MSG_SIZE; } @@ -946,7 +958,7 @@ MsgChannel::MsgChannel(int _fd, struct sockaddr *_a, socklen_t _l, bool text) int on = 1; - if (!setsockopt(_fd, SOL_SOCKET, SO_KEEPALIVE, (char *) &on, sizeof(on))) { + if (!setsockopt(_fd, SOL_SOCKET, SO_KEEPALIVE, (char *) &on, sizeof(on)) && !slow_network()) { #if defined( TCP_KEEPIDLE ) || defined( TCPCTL_KEEPIDLE ) #if defined( TCP_KEEPIDLE ) int keepidle = TCP_KEEPIDLE; @@ -977,8 +989,10 @@ MsgChannel::MsgChannel(int _fd, struct sockaddr *_a, socklen_t _l, bool text) } #ifdef TCP_USER_TIMEOUT - int timeout = 3 * 3 * 1000; // matches the timeout part of keepalive above, in milliseconds - setsockopt(_fd, IPPROTO_TCP, TCP_USER_TIMEOUT, (char *) &timeout, sizeof(timeout)); + if (!slow_network()) { + int timeout = 3 * 3 * 1000; // matches the timeout part of keepalive above, in milliseconds + setsockopt(_fd, IPPROTO_TCP, TCP_USER_TIMEOUT, (char *) &timeout, sizeof(timeout)); + } #endif if (fcntl(fd, F_SETFL, O_NONBLOCK) < 0) { From 7bb901e6ccbc1bf6bdb929e38a88261fdc37d7b5 Mon Sep 17 00:00:00 2001 From: Jan Kratochvil Date: Fri, 11 Aug 2023 14:05:15 +0200 Subject: [PATCH 2/4] Fix "timed out while trying to send data". --- services/comm.cpp | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/services/comm.cpp b/services/comm.cpp index bfae08dd..4ba00da4 100644 --- a/services/comm.cpp +++ b/services/comm.cpp @@ -354,9 +354,12 @@ static bool slow_network() static size_t get_max_write_size() { - if (slow_network()) - return MAX_SLOW_WRITE_SIZE; - return MAX_MSG_SIZE; + return slow_network() ? MAX_SLOW_WRITE_SIZE : MAX_MSG_SIZE; +} + +static size_t get_write_timeout_secs() +{ + return slow_network() ? 60 * 60 : 30; } bool MsgChannel::flush_writebuf(bool blocking) @@ -393,7 +396,7 @@ bool MsgChannel::flush_writebuf(bool blocking) pollfd pfd; pfd.fd = fd; pfd.events = POLLOUT; - ready = poll(&pfd, 1, 30 * 1000); + ready = poll(&pfd, 1, get_write_timeout_secs() * 1000); if (ready < 0 && errno == EINTR) { continue; @@ -407,7 +410,7 @@ bool MsgChannel::flush_writebuf(bool blocking) continue; } if (ready == 0) { - log_error() << "timed out while trying to send data" << endl; + log_error() << "timed out (" << get_write_timeout_secs() << " seconds) while trying to send data" << endl; } /* Timeout or real error --> error. */ From 258cad9d5722c0996f3966ef5644f0a5ba095899 Mon Sep 17 00:00:00 2001 From: Jan Kratochvil Date: Sun, 13 Aug 2023 10:37:59 +0200 Subject: [PATCH 3/4] Fix "busy installing for a long time - removing". --- services/comm.cpp | 6 +++--- services/comm.h | 6 ++++-- 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/services/comm.cpp b/services/comm.cpp index 4ba00da4..c323871c 100644 --- a/services/comm.cpp +++ b/services/comm.cpp @@ -339,7 +339,7 @@ void MsgChannel::writefull(const void *_buf, size_t count) msgtogo += count; } -static bool slow_network() +bool MsgChannel::slow_network() { static bool retval = false; static bool cached = false; @@ -354,12 +354,12 @@ static bool slow_network() static size_t get_max_write_size() { - return slow_network() ? MAX_SLOW_WRITE_SIZE : MAX_MSG_SIZE; + return MsgChannel::slow_network() ? MAX_SLOW_WRITE_SIZE : MAX_MSG_SIZE; } static size_t get_write_timeout_secs() { - return slow_network() ? 60 * 60 : 30; + return MsgChannel::slow_network() ? 60 * 60 : 30; } bool MsgChannel::flush_writebuf(bool blocking) diff --git a/services/comm.h b/services/comm.h index 0727251d..9e26987e 100644 --- a/services/comm.h +++ b/services/comm.h @@ -40,11 +40,11 @@ // if you increase the MIN_PROTOCOL_VERSION, comment out macros below and clean up the code #define MIN_PROTOCOL_VERSION 21 -#define MAX_SCHEDULER_PONG 3 +#define MAX_SCHEDULER_PONG (!MsgChannel::slow_network() ? 3 : 300) // MAX_SCHEDULER_PING must be multiple of MAX_SCHEDULER_PONG #define MAX_SCHEDULER_PING 12 * MAX_SCHEDULER_PONG // maximum amount of time in seconds a daemon can be busy installing -#define MAX_BUSY_INSTALLING 120 +#define MAX_BUSY_INSTALLING (!MsgChannel::slow_network() ? 120 : 10 * 60 * 60) // comparison for protocol version checks #define IS_PROTOCOL_VERSION(x, c) ((c)->protocol >= (x)) @@ -284,6 +284,8 @@ class MsgChannel MsgChannel &operator<<(const std::string &); MsgChannel &operator<<(const std::list &); + static bool slow_network(); + // our filedesc int fd; From 2fec66c6e159495321b0f135662d857d00f4aa97 Mon Sep 17 00:00:00 2001 From: Jan Kratochvil Date: Sun, 13 Aug 2023 11:06:39 +0200 Subject: [PATCH 4/4] Fix too slow connections to the scheduler. --- daemon/main.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/daemon/main.cpp b/daemon/main.cpp index 2b9082f3..d038eb90 100644 --- a/daemon/main.cpp +++ b/daemon/main.cpp @@ -2017,11 +2017,11 @@ void Daemon::answer_client_requests() pfd.fd = scheduler->fd; pfd.events = POLLIN; pollfds.push_back(pfd); - } else if (discover && discover->listen_fd() >= 0) { + } else if (discover && (discover->listen_fd() >= 0 || discover->connect_fd() >= 0)) { /* We don't explicitely check for discover->get_fd() being in the selected set below. If it's set, we simply will return and our call will make sure we try to get the scheduler. */ - pfd.fd = discover->listen_fd(); + pfd.fd = discover->listen_fd() >= 0 ? discover->listen_fd() : discover->connect_fd(); pfd.events = POLLIN; pollfds.push_back(pfd); }