Skip to content

Commit 452e75c

Browse files
Downport the netlink patch to libteam1.26. Increase netlink buffers (#2822)
1 parent 6bf97d8 commit 452e75c

2 files changed

Lines changed: 304 additions & 1 deletion

File tree

Lines changed: 302 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,302 @@
1+
commit 046fb6ba0aec8246075b18d787daec43201566fa
2+
Author: Antti Tiainen <atiainen@forcepoint.com>
3+
Date: Mon Feb 6 15:41:05 2017 +0200
4+
5+
libteam: resynchronize ifinfo after lost RTNLGRP_LINK notifications
6+
7+
When there's a large number of interfaces (e.g. vlans), teamd loses
8+
link notifications as it cannot read them as fast as kernel is
9+
broadcasting them. This often prevents teamd starting properly if
10+
started concurrently when other links are being set up. It can also
11+
fail when it's up and running, especially in the cases where the team
12+
device itself has a lot of vlans under it.
13+
14+
This can easily be reproduces by simple example (in SMP system) by
15+
manually adding team device with a bunch of vlans, putting it up,
16+
and starting teamd with --take-over option:
17+
18+
root@debian:~# ip link add name team0 type team
19+
root@debian:~# for i in `seq 100 150` ; do
20+
> ip link add link team0 name team0.$i type vlan id $i ; done
21+
root@debian:~# ip link set team0 up
22+
root@debian:~# cat teamd.conf
23+
{
24+
"device": "team0",
25+
"runner": {
26+
"name": "activebackup"
27+
},
28+
"ports": {
29+
"eth1": {},
30+
"eth2": {}
31+
}
32+
}
33+
root@debian:~# teamd -o -N -f teamd.conf
34+
35+
At this point, teamd will not give any error messages or other
36+
indication that something is wrong. But state will not look healthy:
37+
38+
root@debian:~# teamdctl team0 state
39+
setup:
40+
runner: activebackup
41+
ports:
42+
eth1
43+
link watches:
44+
link summary: up
45+
instance[link_watch_0]:
46+
name: ethtool
47+
link: up
48+
down count: 0
49+
Failed to parse JSON port dump.
50+
command call failed (Invalid argument)
51+
52+
If checking state dump, it will show that port eth2 is missing info.
53+
Running strace to teamd will reveal that there's one recvmsgs() that
54+
returned -1 with errno ENOBUFS. What happened in this example was
55+
that when teamd started, all vlans got carrier up, and kernel flooded
56+
notifications faster than teamd could read them. It then lost events
57+
related to port eth2 getting enslaved and up.
58+
59+
The socket that joins RTNLGRP_LINK notifications uses default libnl
60+
32k buffer size. Netlink messages are large (over 1k), and this buffer
61+
gets easily full. Kernel neither knows nor cares were notification
62+
broadcasts delivered. This cannot be fixed by simply increasing the
63+
buffer size, as there's no size that is guaranteed to work in every
64+
use case, and this can require several megabytes of buffer (a way over
65+
normal rmem_max limit) if there are hunderds of vlans.
66+
67+
Only way to recover from this is to refresh all ifinfo list, as it's
68+
invalidated at this point. It cannot easily work around of this by
69+
just refreshing team device and its ports, because library side might
70+
not have ports linked due to events missed, and it doesn't know about
71+
teamd configuration.
72+
73+
Checks now return value of nl_recvmsgs_default() for event socket. In
74+
case of ENOBUFS (which libnl nicely changes to ENOMEM), refreshes
75+
all ifinfo list. get_ifinfo_list() also checks now for removed interfaces
76+
in case of missed dellink event. Currently all TEAM_IFINFO_CHANGE
77+
handlers processed events one by one, so it had to be changed to support
78+
multiple ifinfo changes. For this, ifinfo changed flags are cleared
79+
and removed entries destroyed only after all handlers have been called.
80+
81+
Also, increased nl_cli.sock_event receive buffers to 96k like all other
82+
sockets. Added possibility to change this via environment variable.
83+
84+
Signed-off-by: Antti Tiainen <atiainen@forcepoint.com>
85+
Signed-off-by: Jiri Pirko <jiri@mellanox.com>
86+
87+
diff --git a/libteam/ifinfo.c b/libteam/ifinfo.c
88+
index 72155ae..5c32a9c 100644
89+
--- a/libteam/ifinfo.c
90+
+++ b/libteam/ifinfo.c
91+
@@ -72,6 +72,10 @@ struct team_ifinfo {
92+
#define CHANGED_PHYS_PORT_ID (1 << 5)
93+
#define CHANGED_PHYS_PORT_ID_LEN (1 << 6)
94+
#define CHANGED_ADMIN_STATE (1 << 7)
95+
+/* This is only used when tagging interfaces for finding
96+
+ * removed, and thus not included to CHANGED_ANY.
97+
+ */
98+
+#define CHANGED_REFRESHING (1 << 8)
99+
#define CHANGED_ANY (CHANGED_REMOVED | CHANGED_HWADDR | \
100+
CHANGED_HWADDR_LEN | CHANGED_IFNAME | \
101+
CHANGED_MASTER_IFINDEX | CHANGED_PHYS_PORT_ID | \
102+
@@ -202,7 +206,7 @@ static struct team_ifinfo *ifinfo_find(struct team_handle *th, uint32_t ifindex)
103+
return NULL;
104+
}
105+
106+
-static void clear_last_changed(struct team_handle *th)
107+
+void ifinfo_clear_changed(struct team_handle *th)
108+
{
109+
struct team_ifinfo *ifinfo;
110+
111+
@@ -236,7 +240,7 @@ static void ifinfo_destroy(struct team_ifinfo *ifinfo)
112+
free(ifinfo);
113+
}
114+
115+
-static void ifinfo_destroy_removed(struct team_handle *th)
116+
+void ifinfo_destroy_removed(struct team_handle *th)
117+
{
118+
struct team_ifinfo *ifinfo, *tmp;
119+
120+
@@ -254,8 +258,6 @@ static void obj_input_newlink(struct nl_object *obj, void *arg, bool event)
121+
uint32_t ifindex;
122+
int err;
123+
124+
- ifinfo_destroy_removed(th);
125+
-
126+
link = (struct rtnl_link *) obj;
127+
128+
ifindex = rtnl_link_get_ifindex(link);
129+
@@ -269,7 +271,7 @@ static void obj_input_newlink(struct nl_object *obj, void *arg, bool event)
130+
return;
131+
}
132+
133+
- clear_last_changed(th);
134+
+ clear_changed(ifinfo);
135+
ifinfo_update(ifinfo, link);
136+
137+
if (event)
138+
@@ -292,8 +294,6 @@ static void event_handler_obj_input_dellink(struct nl_object *obj, void *arg)
139+
uint32_t ifindex;
140+
int err;
141+
142+
- ifinfo_destroy_removed(th);
143+
-
144+
link = (struct rtnl_link *) obj;
145+
146+
ifindex = rtnl_link_get_ifindex(link);
147+
@@ -311,7 +311,7 @@ static void event_handler_obj_input_dellink(struct nl_object *obj, void *arg)
148+
return;
149+
}
150+
151+
- clear_last_changed(th);
152+
+ clear_changed(ifinfo);
153+
set_changed(ifinfo, CHANGED_REMOVED);
154+
set_call_change_handlers(th, TEAM_IFINFO_CHANGE);
155+
}
156+
@@ -367,6 +367,14 @@ int get_ifinfo_list(struct team_handle *th)
157+
};
158+
int ret;
159+
int retry = 1;
160+
+ struct team_ifinfo *ifinfo;
161+
+
162+
+ /* Tag all ifinfo, this is cleared in newlink handler.
163+
+ * Any interface that has this after dump is processed
164+
+ * has been removed.
165+
+ */
166+
+ list_for_each_node_entry(ifinfo, &th->ifinfo_list, list)
167+
+ set_changed(ifinfo, CHANGED_REFRESHING);
168+
169+
while (retry) {
170+
retry = 0;
171+
@@ -395,6 +403,15 @@ int get_ifinfo_list(struct team_handle *th)
172+
retry = 1;
173+
}
174+
}
175+
+
176+
+ list_for_each_node_entry(ifinfo, &th->ifinfo_list, list) {
177+
+ if (is_changed(ifinfo, CHANGED_REFRESHING)) {
178+
+ clear_changed(ifinfo);
179+
+ set_changed(ifinfo, CHANGED_REMOVED);
180+
+ set_call_change_handlers(th, TEAM_IFINFO_CHANGE);
181+
+ }
182+
+ }
183+
+
184+
ret = check_call_change_handlers(th, TEAM_IFINFO_CHANGE);
185+
if (ret < 0)
186+
err(th, "get_ifinfo_list: check_call_change_handers failed");
187+
diff --git a/libteam/libteam.c b/libteam/libteam.c
188+
index ac187aa..d5f22cd 100644
189+
--- a/libteam/libteam.c
190+
+++ b/libteam/libteam.c
191+
@@ -236,6 +236,10 @@ int check_call_change_handlers(struct team_handle *th,
192+
break;
193+
}
194+
}
195+
+ if (call_type_mask & TEAM_IFINFO_CHANGE) {
196+
+ ifinfo_destroy_removed(th);
197+
+ ifinfo_clear_changed(th);
198+
+ }
199+
th->change_handler.pending_type_mask &= ~call_type_mask;
200+
return err;
201+
}
202+
@@ -546,6 +550,11 @@ int team_destroy(struct team_handle *th)
203+
#endif
204+
/* \endcond */
205+
206+
+/* libnl uses default 32k socket receive buffer size,
207+
+ * whicn can get too small. Use 96k for all sockets.
208+
+ */
209+
+#define NETLINK_RCVBUF 983040
210+
+
211+
/**
212+
* @param th libteam library context
213+
* @param ifindex team device interface index
214+
@@ -561,6 +570,8 @@ int team_init(struct team_handle *th, uint32_t ifindex)
215+
int err;
216+
int grp_id;
217+
int val;
218+
+ int eventbufsize;
219+
+ const char *env;
220+
221+
if (!ifindex) {
222+
err(th, "Passed interface index %d is not valid.", ifindex);
223+
@@ -589,12 +600,12 @@ int team_init(struct team_handle *th, uint32_t ifindex)
224+
return -errno;
225+
}
226+
227+
- err = nl_socket_set_buffer_size(th->nl_sock, 98304, 0);
228+
+ err = nl_socket_set_buffer_size(th->nl_sock, NETLINK_RCVBUF, 0);
229+
if (err) {
230+
err(th, "Failed to set buffer size of netlink sock.");
231+
return -nl2syserr(err);
232+
}
233+
- err = nl_socket_set_buffer_size(th->nl_sock_event, 98304, 0);
234+
+ err = nl_socket_set_buffer_size(th->nl_sock_event, NETLINK_RCVBUF, 0);
235+
if (err) {
236+
err(th, "Failed to set buffer size of netlink event sock.");
237+
return -nl2syserr(err);
238+
@@ -627,6 +638,25 @@ int team_init(struct team_handle *th, uint32_t ifindex)
239+
nl_socket_modify_cb(th->nl_cli.sock_event, NL_CB_VALID,
240+
NL_CB_CUSTOM, cli_event_handler, th);
241+
nl_cli_connect(th->nl_cli.sock_event, NETLINK_ROUTE);
242+
+
243+
+ env = getenv("TEAM_EVENT_BUFSIZE");
244+
+ if (env) {
245+
+ eventbufsize = strtol(env, NULL, 10);
246+
+ /* ignore other errors, libnl forces minimum 32k and
247+
+ * too large values are truncated to system rmem_max
248+
+ */
249+
+ if (eventbufsize < 0)
250+
+ eventbufsize = 0;
251+
+ } else {
252+
+ eventbufsize = NETLINK_RCVBUF;
253+
+ }
254+
+
255+
+ err = nl_socket_set_buffer_size(th->nl_cli.sock_event, eventbufsize, 0);
256+
+ if (err) {
257+
+ err(th, "Failed to set cli event socket buffer size.");
258+
+ return err;
259+
+ }
260+
+
261+
err = nl_socket_add_membership(th->nl_cli.sock_event, RTNLGRP_LINK);
262+
if (err < 0) {
263+
err(th, "Failed to add netlink membership.");
264+
@@ -767,7 +797,23 @@ static int get_cli_sock_event_fd(struct team_handle *th)
265+
266+
static int cli_sock_event_handler(struct team_handle *th)
267+
{
268+
- nl_recvmsgs_default(th->nl_cli.sock_event);
269+
+ int err;
270+
+
271+
+ err = nl_recvmsgs_default(th->nl_cli.sock_event);
272+
+ err = -nl2syserr(err);
273+
+
274+
+ /* libnl thinks ENOBUFS and ENOMEM are same. Hope it was ENOBUFS. */
275+
+ if (err == -ENOMEM) {
276+
+ warn(th, "Lost link notifications from kernel.");
277+
+ /* There's no way to know what events were lost and no
278+
+ * way to get them again. Refresh all.
279+
+ */
280+
+ err = get_ifinfo_list(th);
281+
+ }
282+
+
283+
+ if (err)
284+
+ return err;
285+
+
286+
return check_call_change_handlers(th, TEAM_IFINFO_CHANGE);
287+
}
288+
289+
diff --git a/libteam/team_private.h b/libteam/team_private.h
290+
index a07632f..a5eb0be 100644
291+
--- a/libteam/team_private.h
292+
+++ b/libteam/team_private.h
293+
@@ -115,6 +115,9 @@ int ifinfo_link_with_port(struct team_handle *th, uint32_t ifindex,
294+
int ifinfo_link(struct team_handle *th, uint32_t ifindex,
295+
struct team_ifinfo **p_ifinfo);
296+
void ifinfo_unlink(struct team_ifinfo *ifinfo);
297+
+void ifinfo_clear_changed(struct team_handle *th);
298+
+void ifinfo_destroy_removed(struct team_handle *th);
299+
+int get_ifinfo_list(struct team_handle *th);
300+
int get_options_handler(struct nl_msg *msg, void *arg);
301+
int option_list_alloc(struct team_handle *th);
302+
int option_list_init(struct team_handle *th);

src/libteam/series

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,4 +7,5 @@
77
0007-Skip-setting-the-same-hwaddr-to-lag-port-to-avoid-di.patch
88
0008-teamd-register-change-handler-for-TEAM_IFINFO_CHANGE.patch
99
0009-teamd-prevent-private-change-handler-reentrance.patch
10-
0010-teamd-lacp-update-port-state-according-to-partners-sy.patch
10+
0010-teamd-lacp-update-port-state-according-to-partners-sy.patch
11+
0011-libteam-resynchronize-ifinfo-after-lost-RTNLGRP_LINK-.patch

0 commit comments

Comments
 (0)