Currently qdisc_lock(q) is taken before enqueuing and dequeuing for lockless qdisc's skb_bad_txq/gso_skb queue, qdisc->seqlock is also taken, which can provide the same protection as qdisc_lock(q).
This patch removes the unnecessay qdisc_lock(q) protection for lockless qdisc' skb_bad_txq/gso_skb queue.
And dev_reset_queue() takes the qdisc->seqlock for lockless qdisc besides taking the qdisc_lock(q) when doing the qdisc reset, some_qdisc_is_busy() takes both qdisc->seqlock and qdisc_lock(q) when checking qdisc status. It is unnecessary to take both lock while the fast path only take one lock, so this patch also changes it to only take qdisc_lock(q) for locked qdisc, and only take qdisc->seqlock for lockless qdisc.
Since qdisc->seqlock is taken for lockless qdisc when calling qdisc_is_running() in some_qdisc_is_busy(), use qdisc->running to decide if the lockless qdisc is running.
Signed-off-by: Yunsheng Lin linyunsheng@huawei.com --- include/net/sch_generic.h | 2 -- net/sched/sch_generic.c | 72 +++++++++++++---------------------------------- 2 files changed, 19 insertions(+), 55 deletions(-)
diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index 2d6eb60..0e497ed 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h @@ -139,8 +139,6 @@ static inline struct Qdisc *qdisc_refcount_inc_nz(struct Qdisc *qdisc)
static inline bool qdisc_is_running(struct Qdisc *qdisc) { - if (qdisc->flags & TCQ_F_NOLOCK) - return spin_is_locked(&qdisc->seqlock); return (raw_read_seqcount(&qdisc->running) & 1) ? true : false; }
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index 49eae93..a5f1e3c 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -38,7 +38,7 @@ EXPORT_SYMBOL(default_qdisc_ops); /* Main transmission queue. */
/* Modifications to data participating in scheduling must be protected with - * qdisc_lock(qdisc) spinlock. + * qdisc_lock(qdisc) or qdisc->seqlock spinlock. * * The idea is the following: * - enqueue, dequeue are serialized via qdisc root lock @@ -51,14 +51,8 @@ EXPORT_SYMBOL(default_qdisc_ops); static inline struct sk_buff *__skb_dequeue_bad_txq(struct Qdisc *q) { const struct netdev_queue *txq = q->dev_queue; - spinlock_t *lock = NULL; struct sk_buff *skb;
- if (q->flags & TCQ_F_NOLOCK) { - lock = qdisc_lock(q); - spin_lock(lock); - } - skb = skb_peek(&q->skb_bad_txq); if (skb) { /* check the reason of requeuing without tx lock first */ @@ -77,9 +71,6 @@ static inline struct sk_buff *__skb_dequeue_bad_txq(struct Qdisc *q) } }
- if (lock) - spin_unlock(lock); - return skb; }
@@ -96,13 +87,6 @@ static inline struct sk_buff *qdisc_dequeue_skb_bad_txq(struct Qdisc *q) static inline void qdisc_enqueue_skb_bad_txq(struct Qdisc *q, struct sk_buff *skb) { - spinlock_t *lock = NULL; - - if (q->flags & TCQ_F_NOLOCK) { - lock = qdisc_lock(q); - spin_lock(lock); - } - __skb_queue_tail(&q->skb_bad_txq, skb);
if (qdisc_is_percpu_stats(q)) { @@ -112,20 +96,10 @@ static inline void qdisc_enqueue_skb_bad_txq(struct Qdisc *q, qdisc_qstats_backlog_inc(q, skb); q->q.qlen++; } - - if (lock) - spin_unlock(lock); }
static inline void dev_requeue_skb(struct sk_buff *skb, struct Qdisc *q) { - spinlock_t *lock = NULL; - - if (q->flags & TCQ_F_NOLOCK) { - lock = qdisc_lock(q); - spin_lock(lock); - } - while (skb) { struct sk_buff *next = skb->next;
@@ -144,8 +118,7 @@ static inline void dev_requeue_skb(struct sk_buff *skb, struct Qdisc *q)
skb = next; } - if (lock) - spin_unlock(lock); + __netif_schedule(q); }
@@ -207,24 +180,9 @@ static struct sk_buff *dequeue_skb(struct Qdisc *q, bool *validate,
*packets = 1; if (unlikely(!skb_queue_empty(&q->gso_skb))) { - spinlock_t *lock = NULL; - - if (q->flags & TCQ_F_NOLOCK) { - lock = qdisc_lock(q); - spin_lock(lock); - }
skb = skb_peek(&q->gso_skb);
- /* skb may be null if another cpu pulls gso_skb off in between - * empty check and lock. - */ - if (!skb) { - if (lock) - spin_unlock(lock); - goto validate; - } - /* skb in gso_skb were already validated */ *validate = false; if (xfrm_offload(skb)) @@ -243,11 +201,10 @@ static struct sk_buff *dequeue_skb(struct Qdisc *q, bool *validate, } else { skb = NULL; } - if (lock) - spin_unlock(lock); + goto trace; } -validate: + *validate = true;
if ((q->flags & TCQ_F_ONETXQUEUE) && @@ -1153,13 +1110,15 @@ static void dev_reset_queue(struct net_device *dev,
if (nolock) spin_lock_bh(&qdisc->seqlock); - spin_lock_bh(qdisc_lock(qdisc)); + else + spin_lock_bh(qdisc_lock(qdisc));
qdisc_reset(qdisc);
- spin_unlock_bh(qdisc_lock(qdisc)); if (nolock) spin_unlock_bh(&qdisc->seqlock); + else + spin_unlock_bh(qdisc_lock(qdisc)); }
static bool some_qdisc_is_busy(struct net_device *dev) @@ -1168,20 +1127,27 @@ static bool some_qdisc_is_busy(struct net_device *dev)
for (i = 0; i < dev->num_tx_queues; i++) { struct netdev_queue *dev_queue; - spinlock_t *root_lock; struct Qdisc *q; + bool nolock; int val;
dev_queue = netdev_get_tx_queue(dev, i); q = dev_queue->qdisc_sleeping;
- root_lock = qdisc_lock(q); - spin_lock_bh(root_lock); + nolock = q->flags & TCQ_F_NOLOCK; + + if (nolock) + spin_lock_bh(&q->seqlock); + else + spin_lock_bh(qdisc_lock(q));
val = (qdisc_is_running(q) || test_bit(__QDISC_STATE_SCHED, &q->state));
- spin_unlock_bh(root_lock); + if (nolock) + spin_unlock_bh(&q->seqlock); + else + spin_unlock_bh(qdisc_lock(q));
if (val) return true;
From: Yunsheng Lin linyunsheng@huawei.com Date: Mon, 15 Mar 2021 17:30:10 +0800
Currently qdisc_lock(q) is taken before enqueuing and dequeuing for lockless qdisc's skb_bad_txq/gso_skb queue, qdisc->seqlock is also taken, which can provide the same protection as qdisc_lock(q).
This patch removes the unnecessay qdisc_lock(q) protection for lockless qdisc' skb_bad_txq/gso_skb queue.
And dev_reset_queue() takes the qdisc->seqlock for lockless qdisc besides taking the qdisc_lock(q) when doing the qdisc reset, some_qdisc_is_busy() takes both qdisc->seqlock and qdisc_lock(q) when checking qdisc status. It is unnecessary to take both lock while the fast path only take one lock, so this patch also changes it to only take qdisc_lock(q) for locked qdisc, and only take qdisc->seqlock for lockless qdisc.
Since qdisc->seqlock is taken for lockless qdisc when calling qdisc_is_running() in some_qdisc_is_busy(), use qdisc->running to decide if the lockless qdisc is running.
Signed-off-by: Yunsheng Lin linyunsheng@huawei.com
What about other things protected by this lock, such as statistics and qlen?
This change looks too risky to me.
On 2021/3/16 7:41, David Miller wrote:
From: Yunsheng Lin linyunsheng@huawei.com Date: Mon, 15 Mar 2021 17:30:10 +0800
Currently qdisc_lock(q) is taken before enqueuing and dequeuing for lockless qdisc's skb_bad_txq/gso_skb queue, qdisc->seqlock is also taken, which can provide the same protection as qdisc_lock(q).
This patch removes the unnecessay qdisc_lock(q) protection for lockless qdisc' skb_bad_txq/gso_skb queue.
And dev_reset_queue() takes the qdisc->seqlock for lockless qdisc besides taking the qdisc_lock(q) when doing the qdisc reset, some_qdisc_is_busy() takes both qdisc->seqlock and qdisc_lock(q) when checking qdisc status. It is unnecessary to take both lock while the fast path only take one lock, so this patch also changes it to only take qdisc_lock(q) for locked qdisc, and only take qdisc->seqlock for lockless qdisc.
Since qdisc->seqlock is taken for lockless qdisc when calling qdisc_is_running() in some_qdisc_is_busy(), use qdisc->running to decide if the lockless qdisc is running.
Signed-off-by: Yunsheng Lin linyunsheng@huawei.com
What about other things protected by this lock, such as statistics and qlen?
This change looks too risky to me.
Ok, If that is the case, maybe we just remove qdisc->seqlock and use qdisc_lock(q) for lockless qdisc too, so that we do not need to worry about "lockless qdisc' other things protected by qdisc_lock(q)".
At least for the fast path, taking two locks for lockless qdisc hurts performance when handling requeued skb, especially if the lockless qdisc supports TCQ_F_CAN_BYPASS.
.
From: Yunsheng Lin linyunsheng@huawei.com Date: Tue, 16 Mar 2021 10:40:56 +0800
On 2021/3/16 7:41, David Miller wrote:
From: Yunsheng Lin linyunsheng@huawei.com
At least for the fast path, taking two locks for lockless qdisc hurts performance when handling requeued skb, especially if the lockless qdisc supports TCQ_F_CAN_BYPASS.
The bad txq and gro skb cases are not "fast path", sorry
On 2021/3/17 5:45, David Miller wrote:
From: Yunsheng Lin linyunsheng@huawei.com Date: Tue, 16 Mar 2021 10:40:56 +0800
On 2021/3/16 7:41, David Miller wrote:
From: Yunsheng Lin linyunsheng@huawei.com
At least for the fast path, taking two locks for lockless qdisc hurts performance when handling requeued skb, especially if the lockless qdisc supports TCQ_F_CAN_BYPASS.
The bad txq and gro skb cases are not "fast path", sorry
You are right, it is more of exceptional data path, but it is still ok to clean that up without obvious risk, right? For I am going to replace qdisc->seqlock with qdisc_lock(q) for lockless qdisc too and remove qdisc->seqlock, which makes more sense.
.
On Mon, Mar 15, 2021 at 4:42 PM David Miller davem@redhat.com wrote:
From: Yunsheng Lin linyunsheng@huawei.com Date: Mon, 15 Mar 2021 17:30:10 +0800
Currently qdisc_lock(q) is taken before enqueuing and dequeuing for lockless qdisc's skb_bad_txq/gso_skb queue, qdisc->seqlock is also taken, which can provide the same protection as qdisc_lock(q).
This patch removes the unnecessay qdisc_lock(q) protection for lockless qdisc' skb_bad_txq/gso_skb queue.
And dev_reset_queue() takes the qdisc->seqlock for lockless qdisc besides taking the qdisc_lock(q) when doing the qdisc reset, some_qdisc_is_busy() takes both qdisc->seqlock and qdisc_lock(q) when checking qdisc status. It is unnecessary to take both lock while the fast path only take one lock, so this patch also changes it to only take qdisc_lock(q) for locked qdisc, and only take qdisc->seqlock for lockless qdisc.
Since qdisc->seqlock is taken for lockless qdisc when calling qdisc_is_running() in some_qdisc_is_busy(), use qdisc->running to decide if the lockless qdisc is running.
Signed-off-by: Yunsheng Lin linyunsheng@huawei.com
What about other things protected by this lock, such as statistics and qlen?
This change looks too risky to me.
They are per-cpu for pfifo_fast which sets TCQ_F_CPUSTATS too.
Thanks.
On 2021/3/17 2:43, Cong Wang wrote:
On Mon, Mar 15, 2021 at 4:42 PM David Miller davem@redhat.com wrote:
From: Yunsheng Lin linyunsheng@huawei.com Date: Mon, 15 Mar 2021 17:30:10 +0800
Currently qdisc_lock(q) is taken before enqueuing and dequeuing for lockless qdisc's skb_bad_txq/gso_skb queue, qdisc->seqlock is also taken, which can provide the same protection as qdisc_lock(q).
This patch removes the unnecessay qdisc_lock(q) protection for lockless qdisc' skb_bad_txq/gso_skb queue.
And dev_reset_queue() takes the qdisc->seqlock for lockless qdisc besides taking the qdisc_lock(q) when doing the qdisc reset, some_qdisc_is_busy() takes both qdisc->seqlock and qdisc_lock(q) when checking qdisc status. It is unnecessary to take both lock while the fast path only take one lock, so this patch also changes it to only take qdisc_lock(q) for locked qdisc, and only take qdisc->seqlock for lockless qdisc.
Since qdisc->seqlock is taken for lockless qdisc when calling qdisc_is_running() in some_qdisc_is_busy(), use qdisc->running to decide if the lockless qdisc is running.
Signed-off-by: Yunsheng Lin linyunsheng@huawei.com
What about other things protected by this lock, such as statistics and qlen?
This change looks too risky to me.
They are per-cpu for pfifo_fast which sets TCQ_F_CPUSTATS too.
Did you mean qdisc_lock(q) are protecting per-cpu stats for pfifo_fast too?
Thanks.
.
On Mon, Mar 15, 2021 at 2:29 AM Yunsheng Lin linyunsheng@huawei.com wrote:
Currently qdisc_lock(q) is taken before enqueuing and dequeuing for lockless qdisc's skb_bad_txq/gso_skb queue, qdisc->seqlock is also taken, which can provide the same protection as qdisc_lock(q).
This patch removes the unnecessay qdisc_lock(q) protection for lockless qdisc' skb_bad_txq/gso_skb queue.
And dev_reset_queue() takes the qdisc->seqlock for lockless qdisc besides taking the qdisc_lock(q) when doing the qdisc reset, some_qdisc_is_busy() takes both qdisc->seqlock and qdisc_lock(q) when checking qdisc status. It is unnecessary to take both lock while the fast path only take one lock, so this patch also changes it to only take qdisc_lock(q) for locked qdisc, and only take qdisc->seqlock for lockless qdisc.
Since qdisc->seqlock is taken for lockless qdisc when calling qdisc_is_running() in some_qdisc_is_busy(), use qdisc->running to decide if the lockless qdisc is running.
What's the benefit here? Since qdisc->q.lock is also per-qdisc, so there is no actual contention to take it when we already acquire q->seqlock, right?
Also, is ->seqlock supposed to be used for protecting skb_bad_txq etc.? From my understanding, it was introduced merely for replacing __QDISC_STATE_RUNNING. If you want to extend it, you probably have to rename it too.
Thanks.
On 2021/3/17 2:41, Cong Wang wrote:
On Mon, Mar 15, 2021 at 2:29 AM Yunsheng Lin linyunsheng@huawei.com wrote:
Currently qdisc_lock(q) is taken before enqueuing and dequeuing for lockless qdisc's skb_bad_txq/gso_skb queue, qdisc->seqlock is also taken, which can provide the same protection as qdisc_lock(q).
This patch removes the unnecessay qdisc_lock(q) protection for lockless qdisc' skb_bad_txq/gso_skb queue.
And dev_reset_queue() takes the qdisc->seqlock for lockless qdisc besides taking the qdisc_lock(q) when doing the qdisc reset, some_qdisc_is_busy() takes both qdisc->seqlock and qdisc_lock(q) when checking qdisc status. It is unnecessary to take both lock while the fast path only take one lock, so this patch also changes it to only take qdisc_lock(q) for locked qdisc, and only take qdisc->seqlock for lockless qdisc.
Since qdisc->seqlock is taken for lockless qdisc when calling qdisc_is_running() in some_qdisc_is_busy(), use qdisc->running to decide if the lockless qdisc is running.
What's the benefit here? Since qdisc->q.lock is also per-qdisc, so there is no actual contention to take it when we already acquire q->seqlock, right?
Yes, there is no actual contention to take qdisc->q.lock while q->seqlock is acquired, but a cleanup or minor optimization.
Also, is ->seqlock supposed to be used for protecting skb_bad_txq etc.? From my understanding, it was introduced merely for replacing __QDISC_STATE_RUNNING. If you want to extend it, you probably have to rename it too.
How about just using qdisc->q.lock for lockless qdisc too and remove dqisc->seqlock completely?
Thanks.
.