2323#include <linux/vmalloc.h>
2424#include <linux/rtnetlink.h>
2525#include <linux/reciprocal_div.h>
26+ #include <linux/rbtree.h>
2627
2728#include <net/netlink.h>
2829#include <net/pkt_sched.h>
6869*/
6970
7071struct netem_sched_data {
71- /* internal t(ime)fifo qdisc uses sch->q and sch->limit */
72+ /* internal t(ime)fifo qdisc uses t_root and sch->limit */
73+ struct rb_root t_root ;
7274
7375 /* optional qdisc for classful handling (NULL at netem init) */
7476 struct Qdisc * qdisc ;
@@ -128,10 +130,35 @@ struct netem_sched_data {
128130 */
129131struct netem_skb_cb {
130132 psched_time_t time_to_send ;
133+ ktime_t tstamp_save ;
131134};
132135
136+ /* Because space in skb->cb[] is tight, netem overloads skb->next/prev/tstamp
137+ * to hold a rb_node structure.
138+ *
139+ * If struct sk_buff layout is changed, the following checks will complain.
140+ */
141+ static struct rb_node * netem_rb_node (struct sk_buff * skb )
142+ {
143+ BUILD_BUG_ON (offsetof(struct sk_buff , next ) != 0 );
144+ BUILD_BUG_ON (offsetof(struct sk_buff , prev ) !=
145+ offsetof(struct sk_buff , next ) + sizeof (skb -> next ));
146+ BUILD_BUG_ON (offsetof(struct sk_buff , tstamp ) !=
147+ offsetof(struct sk_buff , prev ) + sizeof (skb -> prev ));
148+ BUILD_BUG_ON (sizeof (struct rb_node ) > sizeof (skb -> next ) +
149+ sizeof (skb -> prev ) +
150+ sizeof (skb -> tstamp ));
151+ return (struct rb_node * )& skb -> next ;
152+ }
153+
154+ static struct sk_buff * netem_rb_to_skb (struct rb_node * rb )
155+ {
156+ return (struct sk_buff * )rb ;
157+ }
158+
133159static inline struct netem_skb_cb * netem_skb_cb (struct sk_buff * skb )
134160{
161+ /* we assume we can use skb next/prev/tstamp as storage for rb_node */
135162 qdisc_cb_private_validate (skb , sizeof (struct netem_skb_cb ));
136163 return (struct netem_skb_cb * )qdisc_skb_cb (skb )-> data ;
137164}
@@ -333,20 +360,23 @@ static psched_time_t packet_len_2_sched_time(unsigned int len, struct netem_sche
333360
334361static void tfifo_enqueue (struct sk_buff * nskb , struct Qdisc * sch )
335362{
336- struct sk_buff_head * list = & sch -> q ;
363+ struct netem_sched_data * q = qdisc_priv ( sch ) ;
337364 psched_time_t tnext = netem_skb_cb (nskb )-> time_to_send ;
338- struct sk_buff * skb = skb_peek_tail ( list ) ;
365+ struct rb_node * * p = & q -> t_root . rb_node , * parent = NULL ;
339366
340- /* Optimize for add at tail */
341- if (likely (!skb || tnext >= netem_skb_cb (skb )-> time_to_send ))
342- return __skb_queue_tail (list , nskb );
367+ while (* p ) {
368+ struct sk_buff * skb ;
343369
344- skb_queue_reverse_walk (list , skb ) {
370+ parent = * p ;
371+ skb = netem_rb_to_skb (parent );
345372 if (tnext >= netem_skb_cb (skb )-> time_to_send )
346- break ;
373+ p = & parent -> rb_right ;
374+ else
375+ p = & parent -> rb_left ;
347376 }
348-
349- __skb_queue_after (list , skb , nskb );
377+ rb_link_node (netem_rb_node (nskb ), parent , p );
378+ rb_insert_color (netem_rb_node (nskb ), & q -> t_root );
379+ sch -> q .qlen ++ ;
350380}
351381
352382/*
@@ -436,23 +466,28 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch)
436466 now = psched_get_time ();
437467
438468 if (q -> rate ) {
439- struct sk_buff_head * list = & sch -> q ;
469+ struct sk_buff * last ;
440470
441- if (!skb_queue_empty (list )) {
471+ if (!skb_queue_empty (& sch -> q ))
472+ last = skb_peek_tail (& sch -> q );
473+ else
474+ last = netem_rb_to_skb (rb_last (& q -> t_root ));
475+ if (last ) {
442476 /*
443477 * Last packet in queue is reference point (now),
444478 * calculate this time bonus and subtract
445479 * from delay.
446480 */
447- delay -= netem_skb_cb (skb_peek_tail ( list ) )-> time_to_send - now ;
481+ delay -= netem_skb_cb (last )-> time_to_send - now ;
448482 delay = max_t (psched_tdiff_t , 0 , delay );
449- now = netem_skb_cb (skb_peek_tail ( list ) )-> time_to_send ;
483+ now = netem_skb_cb (last )-> time_to_send ;
450484 }
451485
452486 delay += packet_len_2_sched_time (skb -> len , q );
453487 }
454488
455489 cb -> time_to_send = now + delay ;
490+ cb -> tstamp_save = skb -> tstamp ;
456491 ++ q -> counter ;
457492 tfifo_enqueue (skb , sch );
458493 } else {
@@ -476,6 +511,21 @@ static unsigned int netem_drop(struct Qdisc *sch)
476511 unsigned int len ;
477512
478513 len = qdisc_queue_drop (sch );
514+
515+ if (!len ) {
516+ struct rb_node * p = rb_first (& q -> t_root );
517+
518+ if (p ) {
519+ struct sk_buff * skb = netem_rb_to_skb (p );
520+
521+ rb_erase (p , & q -> t_root );
522+ sch -> q .qlen -- ;
523+ skb -> next = NULL ;
524+ skb -> prev = NULL ;
525+ len = qdisc_pkt_len (skb );
526+ kfree_skb (skb );
527+ }
528+ }
479529 if (!len && q -> qdisc && q -> qdisc -> ops -> drop )
480530 len = q -> qdisc -> ops -> drop (q -> qdisc );
481531 if (len )
@@ -488,19 +538,32 @@ static struct sk_buff *netem_dequeue(struct Qdisc *sch)
488538{
489539 struct netem_sched_data * q = qdisc_priv (sch );
490540 struct sk_buff * skb ;
541+ struct rb_node * p ;
491542
492543 if (qdisc_is_throttled (sch ))
493544 return NULL ;
494545
495546tfifo_dequeue :
496- skb = qdisc_peek_head ( sch );
547+ skb = __skb_dequeue ( & sch -> q );
497548 if (skb ) {
498- const struct netem_skb_cb * cb = netem_skb_cb (skb );
549+ deliver :
550+ sch -> qstats .backlog -= qdisc_pkt_len (skb );
551+ qdisc_unthrottled (sch );
552+ qdisc_bstats_update (sch , skb );
553+ return skb ;
554+ }
555+ p = rb_first (& q -> t_root );
556+ if (p ) {
557+ skb = netem_rb_to_skb (p );
499558
500559 /* if more time remaining? */
501- if (cb -> time_to_send <= psched_get_time ()) {
502- __skb_unlink (skb , & sch -> q );
503- sch -> qstats .backlog -= qdisc_pkt_len (skb );
560+ if (netem_skb_cb (skb )-> time_to_send <= psched_get_time ()) {
561+ rb_erase (p , & q -> t_root );
562+
563+ sch -> q .qlen -- ;
564+ skb -> next = NULL ;
565+ skb -> prev = NULL ;
566+ skb -> tstamp = netem_skb_cb (skb )-> tstamp_save ;
504567
505568#ifdef CONFIG_NET_CLS_ACT
506569 /*
@@ -522,18 +585,16 @@ static struct sk_buff *netem_dequeue(struct Qdisc *sch)
522585 }
523586 goto tfifo_dequeue ;
524587 }
525- deliver :
526- qdisc_unthrottled (sch );
527- qdisc_bstats_update (sch , skb );
528- return skb ;
588+ goto deliver ;
529589 }
530590
531591 if (q -> qdisc ) {
532592 skb = q -> qdisc -> ops -> dequeue (q -> qdisc );
533593 if (skb )
534594 goto deliver ;
535595 }
536- qdisc_watchdog_schedule (& q -> watchdog , cb -> time_to_send );
596+ qdisc_watchdog_schedule (& q -> watchdog ,
597+ netem_skb_cb (skb )-> time_to_send );
537598 }
538599
539600 if (q -> qdisc ) {
0 commit comments