Skip to content

Commit 9c68d7c

Browse files
Introduce keepalives for ZmqClient and ZmqServer
Fixes issue: sonic-net/sonic-buildimage#23110 When a DPU is powered off and back on, the ZMQ client on the switch still holds a stale TCP connection. The first message sent after DPU restart is delivered over the dead connection, gets a TCP RST, and is silently lost. ZMQ then auto-reconnects, so subsequent messages succeed. This patch enables: 1. TCP keepalive on ZmqClient PUSH sockets to detect dead connections proactively (within ~8 seconds of peer going down). 2. ZMQ_IMMEDIATE on ZmqClient PUSH sockets to prevent queueing messages to peers whose underlying TCP connection is not yet completed. 3. TCP keepalive on ZmqServer PULL sockets as defense-in-depth. With these changes, after DPU power-off: - TCP keepalive probes will fail, causing ZMQ to tear down the stale connection and reconnect - ZMQ_IMMEDIATE prevents the first message from being queued to a peer with an incomplete connection, so it stays in the send queue until the reconnection completes Signed-off-by: Prabhat Aravind <[email protected]>
1 parent 1e0584c commit 9c68d7c

2 files changed

Lines changed: 38 additions & 0 deletions

File tree

common/zmqclient.cpp

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -109,6 +109,30 @@ void ZmqClient::connect()
109109
// Increase send buffer for use all bandwidth: http://api.zeromq.org/4-2:zmq-setsockopt
110110
int high_watermark = MQ_WATERMARK;
111111
zmq_setsockopt(m_socket, ZMQ_SNDHWM, &high_watermark, sizeof(high_watermark));
112+
113+
// Enable TCP keepalive to detect dead connections (e.g. DPU power-off).
114+
// Without keepalive, a stale TCP connection persists indefinitely, and the
115+
// first message sent after peer restart is silently lost on the dead connection.
116+
// Keepalive probes will detect the dead peer within ~8s (5s idle + 3x1s probes),
117+
// triggering ZMQ to reconnect before any new data is sent.
118+
int keepalive = 1;
119+
zmq_setsockopt(m_socket, ZMQ_TCP_KEEPALIVE, &keepalive, sizeof(keepalive));
120+
121+
int keepalive_idle = 5; // seconds before first probe
122+
zmq_setsockopt(m_socket, ZMQ_TCP_KEEPALIVE_IDLE, &keepalive_idle, sizeof(keepalive_idle));
123+
124+
int keepalive_intvl = 1; // seconds between probes
125+
zmq_setsockopt(m_socket, ZMQ_TCP_KEEPALIVE_INTVL, &keepalive_intvl, sizeof(keepalive_intvl));
126+
127+
int keepalive_cnt = 3; // number of failed probes before connection is considered dead
128+
zmq_setsockopt(m_socket, ZMQ_TCP_KEEPALIVE_CNT, &keepalive_cnt, sizeof(keepalive_cnt));
129+
130+
// ZMQ_IMMEDIATE: only queue messages to completed connections.
131+
// Prevents the first message from being sent over a not-yet-reconnected
132+
// peer, which would result in silent message loss.
133+
int immediate = 1;
134+
zmq_setsockopt(m_socket, ZMQ_IMMEDIATE, &immediate, sizeof(immediate));
135+
112136
}
113137

114138
if (!m_vrf.empty())

common/zmqserver.cpp

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -90,6 +90,20 @@ void ZmqServer::bind()
9090
zmq_setsockopt(m_socket, ZMQ_RCVHWM, &high_watermark, sizeof(high_watermark));
9191
}
9292

93+
// Enable TCP keepalive on the server socket as defense-in-depth.
94+
// This allows the server to detect and clean up stale client connections.
95+
int keepalive = 1;
96+
zmq_setsockopt(m_socket, ZMQ_TCP_KEEPALIVE, &keepalive, sizeof(keepalive));
97+
98+
int keepalive_idle = 5;
99+
zmq_setsockopt(m_socket, ZMQ_TCP_KEEPALIVE_IDLE, &keepalive_idle, sizeof(keepalive_idle));
100+
101+
int keepalive_intvl = 1;
102+
zmq_setsockopt(m_socket, ZMQ_TCP_KEEPALIVE_INTVL, &keepalive_intvl, sizeof(keepalive_intvl));
103+
104+
int keepalive_cnt = 3;
105+
zmq_setsockopt(m_socket, ZMQ_TCP_KEEPALIVE_CNT, &keepalive_cnt, sizeof(keepalive_cnt));
106+
93107
if (!m_vrf.empty())
94108
{
95109
zmq_setsockopt(m_socket, ZMQ_BINDTODEVICE, m_vrf.c_str(), m_vrf.length());

0 commit comments

Comments
 (0)