Skip to content

Commit 1a3b91d

Browse files
ZhaohuiSFengPan-Frank
authored andcommitted
Add lldpd patch to fix incomplete interface detection (#25436)
Why I did it On 5640 full topology testbed, during system boots up, 457(456 Ethernet + eth0) interfaces are booting up, lldpd is initializing too. when lldpd initializing, it will send RTM_GETLINK dump to get all interfaces, but during this period, some new interfaces are boots up, lldpd subscribes async notification of netlink update(levent_iface_subscribe). Since queries and changes are using same sokcet cfg->g_netlink->nl_socket, previous RTM_GETLINK dump interfaces(netlink_recv RTM_GETLINK) is impacted by the new RTM_NEWLINK messages handling process(netlink_change_cb). About 200+ interfaces are missing in lldp neighbor. Only 200+ interfaces exist which have RTM_NEWLINK arrived after lldpd initialization. Phenomenon: Incorrect Chassis ID - Chassis ID being incorrect, should be mac but show hostname instead a. Fail to find eth0 and fallback to hostname. WARNING lldp#lldpcli[29]: cannot find port eth0 lldpcli config failure - The port is up already, but later lldp cannot find the port, hence missing port up events and never be able to recover. The symptom will be both sides are missing lldp entries. a. <11>2026-02-05T04:18:42.052245+00:00 ATL21-0101-0014-12BT0 ERR lldp#lldpmgrd[38]: Command failed '['lldpcli', 'configure', 'ports', 'Ethernet501', 'lldp', 'portidsubtype', 'local', 'etp63f', 'description', 'ATL210101580129:A1.PORT8']': 2026-02-05T04:18:42 [WARN/lldpctl] cannot find port Ethernet501#012 - command was failed 6 times, disabling retry Work item tracking Microsoft ADO 36610002: How I did it now sonic is using lldpd 1.0.16. There is a known issue in lldpd community: In some cases lldpd cannot get all interfaces · Issue #611 · lldpd/lldpd And it's fixed, but no tag for 1.0.16 :daemon/netlink: use a different socket for changes and queries · lldpd/lldpd@88fe3fa Add this commit as a new patch for sonic to fix this issue. How to verify it config reload can repro this issue easily on 5640 full topology testbed. Try run "config reload" and verify if all lldp neighbors are up. Signed-off-by: Zhaohui Sun <[email protected]> Signed-off-by: Feng Pan <[email protected]>
1 parent 804b772 commit 1a3b91d

File tree

2 files changed

+193
-1
lines changed

2 files changed

+193
-1
lines changed
Lines changed: 191 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,191 @@
1+
diff --git a/src/daemon/netlink.c b/src/daemon/netlink.c
2+
index 55f9b66..e7db1fc 100644
3+
--- a/src/daemon/netlink.c
4+
+++ b/src/daemon/netlink.c
5+
@@ -38,7 +38,8 @@ struct netlink_req {
6+
};
7+
8+
struct lldpd_netlink {
9+
- int nl_socket;
10+
+ int nl_socket_queries;
11+
+ int nl_socket_changes;
12+
int nl_socket_recv_size;
13+
/* Cache */
14+
struct interfaces_device_list *devices;
15+
@@ -94,34 +95,35 @@ netlink_socket_set_buffer_size(int s, int optname, const char *optname_str, int
16+
* @return 0 on success, -1 otherwise
17+
*/
18+
static int
19+
-netlink_connect(struct lldpd *cfg, int protocol, unsigned groups)
20+
+netlink_connect(struct lldpd *cfg, unsigned groups)
21+
{
22+
- int s;
23+
+ int s1 = -1, s2 = -1;
24+
struct sockaddr_nl local = { .nl_family = AF_NETLINK,
25+
.nl_pid = 0,
26+
.nl_groups = groups };
27+
28+
- /* Open Netlink socket */
29+
- log_debug("netlink", "opening netlink socket");
30+
- s = socket(AF_NETLINK, SOCK_RAW, protocol);
31+
- if (s == -1) {
32+
- log_warn("netlink", "unable to open netlink socket");
33+
- return -1;
34+
+ /* Open Netlink socket for subscriptions */
35+
+ log_debug("netlink", "opening netlink sockets");
36+
+ s1 = socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE);
37+
+ if (s1 == -1) {
38+
+ log_warn("netlink", "unable to open netlink socket for changes");
39+
+ goto error;
40+
}
41+
if (NETLINK_SEND_BUFSIZE &&
42+
- netlink_socket_set_buffer_size(s, SO_SNDBUF, "SO_SNDBUF",
43+
+ netlink_socket_set_buffer_size(s1, SO_SNDBUF, "SO_SNDBUF",
44+
NETLINK_SEND_BUFSIZE) == -1) {
45+
- close(s);
46+
- return -1;
47+
+ log_warn("netlink", "unable to set send buffer size");
48+
+ goto error;
49+
}
50+
51+
- int rc = netlink_socket_set_buffer_size(s, SO_RCVBUF, "SO_RCVBUF",
52+
+ int rc = netlink_socket_set_buffer_size(s1, SO_RCVBUF, "SO_RCVBUF",
53+
NETLINK_RECEIVE_BUFSIZE);
54+
switch (rc) {
55+
case -1:
56+
- close(s);
57+
- return -1;
58+
+ log_warn("netlink", "unable to set receiver buffer size");
59+
+ goto error;
60+
case -2:
61+
+ /* Cannot set size */
62+
cfg->g_netlink->nl_socket_recv_size = 0;
63+
break;
64+
default:
65+
@@ -129,13 +131,24 @@ netlink_connect(struct lldpd *cfg, int protocol, unsigned groups)
66+
break;
67+
}
68+
if (groups &&
69+
- bind(s, (struct sockaddr *)&local, sizeof(struct sockaddr_nl)) < 0) {
70+
+ bind(s1, (struct sockaddr *)&local, sizeof(struct sockaddr_nl)) < 0) {
71+
log_warn("netlink", "unable to bind netlink socket");
72+
- close(s);
73+
- return -1;
74+
+ goto error;
75+
}
76+
- cfg->g_netlink->nl_socket = s;
77+
+
78+
+ /* Opening Netlink socket to for queries */
79+
+ s2 = socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE);
80+
+ if (s2 == -1) {
81+
+ log_warn("netlink", "unable to open netlink socket for queries");
82+
+ goto error;
83+
+ }
84+
+ cfg->g_netlink->nl_socket_changes = s1;
85+
+ cfg->g_netlink->nl_socket_queries = s2;
86+
return 0;
87+
+error:
88+
+ if (s1 != -1) close(s1);
89+
+ if (s2 != -1) close(s2);
90+
+ return -1;
91+
}
92+
93+
/**
94+
@@ -525,13 +538,12 @@ netlink_merge(struct interfaces_device *old, struct interfaces_device *new)
95+
* @return 0 on success, -1 on error
96+
*/
97+
static int
98+
-netlink_recv(struct lldpd *cfg, struct interfaces_device_list *ifs,
99+
+netlink_recv(struct lldpd *cfg, int s, struct interfaces_device_list *ifs,
100+
struct interfaces_address_list *ifas)
101+
{
102+
int end = 0, ret = 0, flags, retry = 0;
103+
struct iovec iov;
104+
int link_update = 0;
105+
- int s = cfg->g_netlink->nl_socket;
106+
107+
struct interfaces_device *ifdold;
108+
struct interfaces_device *ifdnew;
109+
@@ -570,8 +582,10 @@ netlink_recv(struct lldpd *cfg, struct interfaces_device_list *ifs,
110+
}
111+
int rsize = cfg->g_netlink->nl_socket_recv_size;
112+
if (errno == ENOBUFS && rsize > 0 &&
113+
- rsize < NETLINK_MAX_RECEIVE_BUFSIZE) {
114+
- /* Try to increase buffer size */
115+
+ rsize < NETLINK_MAX_RECEIVE_BUFSIZE &&
116+
+ s == cfg->g_netlink->nl_socket_changes) {
117+
+ /* Try to increase buffer size, only for the
118+
+ * socket used to receive changes */
119+
rsize *= 2;
120+
if (rsize > NETLINK_MAX_RECEIVE_BUFSIZE) {
121+
rsize = NETLINK_MAX_RECEIVE_BUFSIZE;
122+
@@ -843,7 +857,7 @@ netlink_subscribe_changes(struct lldpd *cfg)
123+
netlink_group_mask(RTNLGRP_IPV4_IFADDR) |
124+
netlink_group_mask(RTNLGRP_IPV6_IFADDR);
125+
126+
- return netlink_connect(cfg, NETLINK_ROUTE, groups);
127+
+ return netlink_connect(cfg, groups);
128+
}
129+
130+
/**
131+
@@ -852,7 +866,8 @@ static void
132+
netlink_change_cb(struct lldpd *cfg)
133+
{
134+
if (cfg->g_netlink == NULL) return;
135+
- netlink_recv(cfg, cfg->g_netlink->devices, cfg->g_netlink->addresses);
136+
+ netlink_recv(cfg, cfg->g_netlink->nl_socket_changes, cfg->g_netlink->devices,
137+
+ cfg->g_netlink->addresses);
138+
}
139+
140+
/**
141+
@@ -897,22 +912,24 @@ netlink_initialize(struct lldpd *cfg)
142+
}
143+
TAILQ_INIT(ifs);
144+
145+
- if (netlink_send(cfg->g_netlink->nl_socket, RTM_GETADDR, AF_UNSPEC, 1) == -1)
146+
+ if (netlink_send(cfg->g_netlink->nl_socket_queries, RTM_GETADDR, AF_UNSPEC,
147+
+ 1) == -1)
148+
goto end;
149+
- netlink_recv(cfg, NULL, ifaddrs);
150+
- if (netlink_send(cfg->g_netlink->nl_socket, RTM_GETLINK, AF_PACKET, 2) == -1)
151+
+ netlink_recv(cfg, cfg->g_netlink->nl_socket_queries, NULL, ifaddrs);
152+
+ if (netlink_send(cfg->g_netlink->nl_socket_queries, RTM_GETLINK, AF_PACKET,
153+
+ 2) == -1)
154+
goto end;
155+
- netlink_recv(cfg, ifs, NULL);
156+
+ netlink_recv(cfg, cfg->g_netlink->nl_socket_queries, ifs, NULL);
157+
#ifdef ENABLE_DOT1
158+
/* If we have a bridge, search for VLAN-aware bridges */
159+
TAILQ_FOREACH (iff, ifs, next) {
160+
if (iff->type & IFACE_BRIDGE_T) {
161+
log_debug("netlink",
162+
"interface %s is a bridge, check for VLANs", iff->name);
163+
- if (netlink_send(cfg->g_netlink->nl_socket, RTM_GETLINK,
164+
+ if (netlink_send(cfg->g_netlink->nl_socket_queries, RTM_GETLINK,
165+
AF_BRIDGE, 3) == -1)
166+
goto end;
167+
- netlink_recv(cfg, ifs, NULL);
168+
+ netlink_recv(cfg, cfg->g_netlink->nl_socket_queries, ifs, NULL);
169+
break;
170+
}
171+
}
172+
@@ -920,7 +937,7 @@ netlink_initialize(struct lldpd *cfg)
173+
174+
/* Listen to any future change */
175+
cfg->g_iface_cb = netlink_change_cb;
176+
- if (levent_iface_subscribe(cfg, cfg->g_netlink->nl_socket) == -1) {
177+
+ if (levent_iface_subscribe(cfg, cfg->g_netlink->nl_socket_changes) == -1) {
178+
goto end;
179+
}
180+
181+
@@ -937,7 +954,10 @@ void
182+
netlink_cleanup(struct lldpd *cfg)
183+
{
184+
if (cfg->g_netlink == NULL) return;
185+
- if (cfg->g_netlink->nl_socket != -1) close(cfg->g_netlink->nl_socket);
186+
+ if (cfg->g_netlink->nl_socket_changes != -1)
187+
+ close(cfg->g_netlink->nl_socket_changes);
188+
+ if (cfg->g_netlink->nl_socket_queries != -1)
189+
+ close(cfg->g_netlink->nl_socket_queries);
190+
interfaces_free_devices(cfg->g_netlink->devices);
191+
interfaces_free_addresses(cfg->g_netlink->addresses);

src/lldpd/patch/series

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,3 @@
1-
# This series applies on GIT commit 396961a038a38675d46f96eaa7b430b2a1f8701b
1+
# This series applies on GIT commit 7a595f1adfa4ae5302ba7953e14fd69c8579aa16
22
0001-return-error-when-port-does-not-exist.patch
3+
0002-use-a-different-socket-for-changes-and-queries.patch

0 commit comments

Comments
 (0)