From 356ed1bdeda33bb2bf32d63dbfec4cc464f5b995 Mon Sep 17 00:00:00 2001 From: Sonic Build Admin Date: Wed, 11 Mar 2026 02:26:33 +0000 Subject: [PATCH] Add lldpd patch to fix incomplete interface detection MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit #### Why I did it On 5640 full topology testbed, during system boots up, 457(456 Ethernet + eth0) interfaces are booting up, lldpd is initializing too. when lldpd initializing, it will send RTM_GETLINK dump to get all interfaces, but during this period, some new interfaces are boots up, lldpd subscribes async notification of netlink update(`levent_iface_subscribe`). Since queries and changes are using same `sokcet cfg->g_netlink->nl_socket,` previous `RTM_GETLINK` dump interfaces(`netlink_recv RTM_GETLINK`) is impacted by the new RTM_NEWLINK messages handling process(`netlink_change_cb`). About 200+ interfaces are missing in lldp neighbor. Only 200+ interfaces exist which have RTM_NEWLINK arrived after lldpd initialization. - Phenomenon: 1. Incorrect Chassis ID - Chassis ID being incorrect, should be mac but show hostname instead a. Fail to find eth0 and fallback to hostname. WARNING lldp#lldpcli[29]: cannot find port eth0 1. lldpcli config failure - The port is up already, but later lldp cannot find the port, hence missing port up events and never be able to recover. The symptom will be both sides are missing lldp entries. a. <11>2026-02-05T04:18:42.052245+00:00 ATL21-0101-0014-12BT0 ERR lldp#lldpmgrd[38]: Command failed '['lldpcli', 'configure', 'ports', 'Ethernet501', 'lldp', 'portidsubtype', 'local', 'etp63f', 'description', 'ATL210101580129:A1.PORT8']': 2026-02-05T04:18:42 [WARN/lldpctl] cannot find port Ethernet501#012 - command was failed 6 times, disabling retry ##### Work item tracking - Microsoft ADO **36610002**: #### How I did it now sonic is using lldpd 1.0.16. There is a known issue in lldpd community: In some cases lldpd cannot get all interfaces · Issue #611 · lldpd/lldpd And it's fixed, but no tag for 1.0.16 :daemon/netlink: use a different socket for changes and queries · lldpd/lldpd@88fe3fa Add this commit as a new patch for sonic to fix this issue. #### How to verify it config reload can repro this issue easily on 5640 full topology testbed. Try run "config reload" and verify if all lldp neighbors are up. #### Which release branch to backport (provide reason below if selected) - [ ] 202305 - [ ] 202311 - [ ] 202405 - [ ] 202411 - [ ] 202505 - [x] 202511 #### Tested branch (Please provide the tested image version) - [ ] - [ ] #### Description for the changelog #### Link to config_db schema for YANG module changes Signed-off-by: Sonic Build Admin #### A picture of a cute animal (not mandatory but encouraged) --- ...erent-socket-for-changes-and-queries.patch | 191 ++++++++++++++++++ src/lldpd/patch/series | 3 +- 2 files changed, 193 insertions(+), 1 deletion(-) create mode 100644 src/lldpd/patch/0002-use-a-different-socket-for-changes-and-queries.patch diff --git a/src/lldpd/patch/0002-use-a-different-socket-for-changes-and-queries.patch b/src/lldpd/patch/0002-use-a-different-socket-for-changes-and-queries.patch new file mode 100644 index 00000000000..139b26e5280 --- /dev/null +++ b/src/lldpd/patch/0002-use-a-different-socket-for-changes-and-queries.patch @@ -0,0 +1,191 @@ +diff --git a/src/daemon/netlink.c b/src/daemon/netlink.c +index 55f9b66..e7db1fc 100644 +--- a/src/daemon/netlink.c ++++ b/src/daemon/netlink.c +@@ -38,7 +38,8 @@ struct netlink_req { + }; + + struct lldpd_netlink { +- int nl_socket; ++ int nl_socket_queries; ++ int nl_socket_changes; + int nl_socket_recv_size; + /* Cache */ + struct interfaces_device_list *devices; +@@ -94,34 +95,35 @@ netlink_socket_set_buffer_size(int s, int optname, const char *optname_str, int + * @return 0 on success, -1 otherwise + */ + static int +-netlink_connect(struct lldpd *cfg, int protocol, unsigned groups) ++netlink_connect(struct lldpd *cfg, unsigned groups) + { +- int s; ++ int s1 = -1, s2 = -1; + struct sockaddr_nl local = { .nl_family = AF_NETLINK, + .nl_pid = 0, + .nl_groups = groups }; + +- /* Open Netlink socket */ +- log_debug("netlink", "opening netlink socket"); +- s = socket(AF_NETLINK, SOCK_RAW, protocol); +- if (s == -1) { +- log_warn("netlink", "unable to open netlink socket"); +- return -1; ++ /* Open Netlink socket for subscriptions */ ++ log_debug("netlink", "opening netlink sockets"); ++ s1 = socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE); ++ if (s1 == -1) { ++ log_warn("netlink", "unable to open netlink socket for changes"); ++ goto error; + } + if (NETLINK_SEND_BUFSIZE && +- netlink_socket_set_buffer_size(s, SO_SNDBUF, "SO_SNDBUF", ++ netlink_socket_set_buffer_size(s1, SO_SNDBUF, "SO_SNDBUF", + NETLINK_SEND_BUFSIZE) == -1) { +- close(s); +- return -1; ++ log_warn("netlink", "unable to set send buffer size"); ++ goto error; + } + +- int rc = netlink_socket_set_buffer_size(s, SO_RCVBUF, "SO_RCVBUF", ++ int rc = netlink_socket_set_buffer_size(s1, SO_RCVBUF, "SO_RCVBUF", + NETLINK_RECEIVE_BUFSIZE); + switch (rc) { + case -1: +- close(s); +- return -1; ++ log_warn("netlink", "unable to set receiver buffer size"); ++ goto error; + case -2: ++ /* Cannot set size */ + cfg->g_netlink->nl_socket_recv_size = 0; + break; + default: +@@ -129,13 +131,24 @@ netlink_connect(struct lldpd *cfg, int protocol, unsigned groups) + break; + } + if (groups && +- bind(s, (struct sockaddr *)&local, sizeof(struct sockaddr_nl)) < 0) { ++ bind(s1, (struct sockaddr *)&local, sizeof(struct sockaddr_nl)) < 0) { + log_warn("netlink", "unable to bind netlink socket"); +- close(s); +- return -1; ++ goto error; + } +- cfg->g_netlink->nl_socket = s; ++ ++ /* Opening Netlink socket to for queries */ ++ s2 = socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE); ++ if (s2 == -1) { ++ log_warn("netlink", "unable to open netlink socket for queries"); ++ goto error; ++ } ++ cfg->g_netlink->nl_socket_changes = s1; ++ cfg->g_netlink->nl_socket_queries = s2; + return 0; ++error: ++ if (s1 != -1) close(s1); ++ if (s2 != -1) close(s2); ++ return -1; + } + + /** +@@ -525,13 +538,12 @@ netlink_merge(struct interfaces_device *old, struct interfaces_device *new) + * @return 0 on success, -1 on error + */ + static int +-netlink_recv(struct lldpd *cfg, struct interfaces_device_list *ifs, ++netlink_recv(struct lldpd *cfg, int s, struct interfaces_device_list *ifs, + struct interfaces_address_list *ifas) + { + int end = 0, ret = 0, flags, retry = 0; + struct iovec iov; + int link_update = 0; +- int s = cfg->g_netlink->nl_socket; + + struct interfaces_device *ifdold; + struct interfaces_device *ifdnew; +@@ -570,8 +582,10 @@ netlink_recv(struct lldpd *cfg, struct interfaces_device_list *ifs, + } + int rsize = cfg->g_netlink->nl_socket_recv_size; + if (errno == ENOBUFS && rsize > 0 && +- rsize < NETLINK_MAX_RECEIVE_BUFSIZE) { +- /* Try to increase buffer size */ ++ rsize < NETLINK_MAX_RECEIVE_BUFSIZE && ++ s == cfg->g_netlink->nl_socket_changes) { ++ /* Try to increase buffer size, only for the ++ * socket used to receive changes */ + rsize *= 2; + if (rsize > NETLINK_MAX_RECEIVE_BUFSIZE) { + rsize = NETLINK_MAX_RECEIVE_BUFSIZE; +@@ -843,7 +857,7 @@ netlink_subscribe_changes(struct lldpd *cfg) + netlink_group_mask(RTNLGRP_IPV4_IFADDR) | + netlink_group_mask(RTNLGRP_IPV6_IFADDR); + +- return netlink_connect(cfg, NETLINK_ROUTE, groups); ++ return netlink_connect(cfg, groups); + } + + /** +@@ -852,7 +866,8 @@ static void + netlink_change_cb(struct lldpd *cfg) + { + if (cfg->g_netlink == NULL) return; +- netlink_recv(cfg, cfg->g_netlink->devices, cfg->g_netlink->addresses); ++ netlink_recv(cfg, cfg->g_netlink->nl_socket_changes, cfg->g_netlink->devices, ++ cfg->g_netlink->addresses); + } + + /** +@@ -897,22 +912,24 @@ netlink_initialize(struct lldpd *cfg) + } + TAILQ_INIT(ifs); + +- if (netlink_send(cfg->g_netlink->nl_socket, RTM_GETADDR, AF_UNSPEC, 1) == -1) ++ if (netlink_send(cfg->g_netlink->nl_socket_queries, RTM_GETADDR, AF_UNSPEC, ++ 1) == -1) + goto end; +- netlink_recv(cfg, NULL, ifaddrs); +- if (netlink_send(cfg->g_netlink->nl_socket, RTM_GETLINK, AF_PACKET, 2) == -1) ++ netlink_recv(cfg, cfg->g_netlink->nl_socket_queries, NULL, ifaddrs); ++ if (netlink_send(cfg->g_netlink->nl_socket_queries, RTM_GETLINK, AF_PACKET, ++ 2) == -1) + goto end; +- netlink_recv(cfg, ifs, NULL); ++ netlink_recv(cfg, cfg->g_netlink->nl_socket_queries, ifs, NULL); + #ifdef ENABLE_DOT1 + /* If we have a bridge, search for VLAN-aware bridges */ + TAILQ_FOREACH (iff, ifs, next) { + if (iff->type & IFACE_BRIDGE_T) { + log_debug("netlink", + "interface %s is a bridge, check for VLANs", iff->name); +- if (netlink_send(cfg->g_netlink->nl_socket, RTM_GETLINK, ++ if (netlink_send(cfg->g_netlink->nl_socket_queries, RTM_GETLINK, + AF_BRIDGE, 3) == -1) + goto end; +- netlink_recv(cfg, ifs, NULL); ++ netlink_recv(cfg, cfg->g_netlink->nl_socket_queries, ifs, NULL); + break; + } + } +@@ -920,7 +937,7 @@ netlink_initialize(struct lldpd *cfg) + + /* Listen to any future change */ + cfg->g_iface_cb = netlink_change_cb; +- if (levent_iface_subscribe(cfg, cfg->g_netlink->nl_socket) == -1) { ++ if (levent_iface_subscribe(cfg, cfg->g_netlink->nl_socket_changes) == -1) { + goto end; + } + +@@ -937,7 +954,10 @@ void + netlink_cleanup(struct lldpd *cfg) + { + if (cfg->g_netlink == NULL) return; +- if (cfg->g_netlink->nl_socket != -1) close(cfg->g_netlink->nl_socket); ++ if (cfg->g_netlink->nl_socket_changes != -1) ++ close(cfg->g_netlink->nl_socket_changes); ++ if (cfg->g_netlink->nl_socket_queries != -1) ++ close(cfg->g_netlink->nl_socket_queries); + interfaces_free_devices(cfg->g_netlink->devices); + interfaces_free_addresses(cfg->g_netlink->addresses); diff --git a/src/lldpd/patch/series b/src/lldpd/patch/series index 8aa5ab8d8f3..c92e03c1bae 100644 --- a/src/lldpd/patch/series +++ b/src/lldpd/patch/series @@ -1,2 +1,3 @@ -# This series applies on GIT commit 396961a038a38675d46f96eaa7b430b2a1f8701b +# This series applies on GIT commit 7a595f1adfa4ae5302ba7953e14fd69c8579aa16 0001-return-error-when-port-does-not-exist.patch +0002-use-a-different-socket-for-changes-and-queries.patch