diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index 3ed6bcc4be72..177f240d59d4 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h @@ -37,8 +37,15 @@ enum qdisc_state_t { __QDISC_STATE_SCHED, __QDISC_STATE_DEACTIVATED, __QDISC_STATE_MISSED, + __QDISC_STATE_DRAINING, }; +#define QDISC_STATE_MISSED BIT(__QDISC_STATE_MISSED) +#define QDISC_STATE_DRAINING BIT(__QDISC_STATE_DRAINING) + +#define QDISC_STATE_NON_EMPTY (QDISC_STATE_MISSED | \ + QDISC_STATE_DRAINING) + struct qdisc_size_table { struct rcu_head rcu; struct list_head list; @@ -145,6 +152,11 @@ static inline bool qdisc_is_running(struct Qdisc *qdisc) return (raw_read_seqcount(&qdisc->running) & 1) ? true : false; } +static inline bool nolock_qdisc_is_empty(const struct Qdisc *qdisc) +{ + return !(READ_ONCE(qdisc->state) & QDISC_STATE_NON_EMPTY); +} + static inline bool qdisc_is_percpu_stats(const struct Qdisc *q) { return q->flags & TCQ_F_CPUSTATS; @@ -206,10 +218,8 @@ static inline void qdisc_run_end(struct Qdisc *qdisc) spin_unlock(&qdisc->seqlock); if (unlikely(test_bit(__QDISC_STATE_MISSED, - &qdisc->state))) { - clear_bit(__QDISC_STATE_MISSED, &qdisc->state); + &qdisc->state))) __netif_schedule(qdisc); - } } else { write_seqcount_end(&qdisc->running); } diff --git a/net/core/dev.c b/net/core/dev.c index 50531a2d0b20..991d09b67bd9 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -3852,10 +3852,33 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q, qdisc_calculate_pkt_len(skb, q); if (q->flags & TCQ_F_NOLOCK) { - rc = q->enqueue(skb, q, &to_free) & NET_XMIT_MASK; - if (likely(!netif_xmit_frozen_or_stopped(txq))) - qdisc_run(q); + if (q->flags & TCQ_F_CAN_BYPASS && nolock_qdisc_is_empty(q) && + qdisc_run_begin(q)) { + /* Retest nolock_qdisc_is_empty() within the protection + * of q->seqlock to protect from racing with requeuing. + */ + if (unlikely(!nolock_qdisc_is_empty(q))) { + rc = q->enqueue(skb, q, &to_free) & + NET_XMIT_MASK; + __qdisc_run(q); + qdisc_run_end(q); + goto no_lock_out; + } + + qdisc_bstats_cpu_update(q, skb); + if (sch_direct_xmit(skb, q, dev, txq, NULL, true) && + !nolock_qdisc_is_empty(q)) + __qdisc_run(q); + + qdisc_run_end(q); + return NET_XMIT_SUCCESS; + } + + rc = q->enqueue(skb, q, &to_free) & NET_XMIT_MASK; + qdisc_run(q); + +no_lock_out: if (unlikely(to_free)) kfree_skb_list(to_free); return rc; diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index e9c0afc8becc..9984ccc45946 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -52,6 +52,8 @@ static void qdisc_maybe_clear_missed(struct Qdisc *q, */ if (!netif_xmit_frozen_or_stopped(txq)) set_bit(__QDISC_STATE_MISSED, &q->state); + else + set_bit(__QDISC_STATE_DRAINING, &q->state); } /* Main transmission queue. */ @@ -164,9 +166,13 @@ static inline void dev_requeue_skb(struct sk_buff *skb, struct Qdisc *q) skb = next; } - if (lock) + + if (lock) { spin_unlock(lock); - __netif_schedule(q); + set_bit(__QDISC_STATE_MISSED, &q->state); + } else { + __netif_schedule(q); + } } static void try_bulk_dequeue_skb(struct Qdisc *q, @@ -409,7 +415,11 @@ void __qdisc_run(struct Qdisc *q) while (qdisc_restart(q, &packets)) { quota -= packets; if (quota <= 0) { - __netif_schedule(q); + if (q->flags & TCQ_F_NOLOCK) + set_bit(__QDISC_STATE_MISSED, &q->state); + else + __netif_schedule(q); + break; } } @@ -698,13 +708,14 @@ retry: if (likely(skb)) { qdisc_update_stats_at_dequeue(qdisc, skb); } else if (need_retry && - test_bit(__QDISC_STATE_MISSED, &qdisc->state)) { + READ_ONCE(qdisc->state) & QDISC_STATE_NON_EMPTY) { /* Delay clearing the STATE_MISSED here to reduce * the overhead of the second spin_trylock() in * qdisc_run_begin() and __netif_schedule() calling * in qdisc_run_end(). */ clear_bit(__QDISC_STATE_MISSED, &qdisc->state); + clear_bit(__QDISC_STATE_DRAINING, &qdisc->state); /* Make sure dequeuing happens after clearing * STATE_MISSED. @@ -1222,6 +1233,7 @@ static void dev_reset_queue(struct net_device *dev, spin_unlock_bh(qdisc_lock(qdisc)); if (nolock) { clear_bit(__QDISC_STATE_MISSED, &qdisc->state); + clear_bit(__QDISC_STATE_DRAINING, &qdisc->state); spin_unlock_bh(&qdisc->seqlock); } }