On Thu, May 13, 2021 at 7:27 PM Yunsheng Lin linyunsheng@huawei.com wrote:
struct qdisc_size_table { @@ -159,8 +160,33 @@ static inline bool qdisc_is_empty(const struct Qdisc *qdisc) static inline bool qdisc_run_begin(struct Qdisc *qdisc) { if (qdisc->flags & TCQ_F_NOLOCK) {
if (spin_trylock(&qdisc->seqlock))
goto nolock_empty;
/* If the MISSED flag is set, it means other thread has
* set the MISSED flag before second spin_trylock(), so
* we can return false here to avoid multi cpus doing
* the set_bit() and second spin_trylock() concurrently.
*/
if (test_bit(__QDISC_STATE_MISSED, &qdisc->state))
return false;
/* Set the MISSED flag before the second spin_trylock(),
* if the second spin_trylock() return false, it means
* other cpu holding the lock will do dequeuing for us
* or it will see the MISSED flag set after releasing
* lock and reschedule the net_tx_action() to do the
* dequeuing.
*/
set_bit(__QDISC_STATE_MISSED, &qdisc->state);
/* Retry again in case other CPU may not see the new flag
* after it releases the lock at the end of qdisc_run_end().
*/ if (!spin_trylock(&qdisc->seqlock)) return false;
+nolock_empty: WRITE_ONCE(qdisc->empty, false); } else if (qdisc_is_running(qdisc)) { return false; @@ -176,8 +202,15 @@ static inline bool qdisc_run_begin(struct Qdisc *qdisc) static inline void qdisc_run_end(struct Qdisc *qdisc) { write_seqcount_end(&qdisc->running);
if (qdisc->flags & TCQ_F_NOLOCK)
if (qdisc->flags & TCQ_F_NOLOCK) { spin_unlock(&qdisc->seqlock);
if (unlikely(test_bit(__QDISC_STATE_MISSED,
&qdisc->state))) {
clear_bit(__QDISC_STATE_MISSED, &qdisc->state);
We have test_and_clear_bit() which is atomic, test_bit()+clear_bit() is not.
__netif_schedule(qdisc);
}
}
}
static inline bool qdisc_may_bulk(const struct Qdisc *qdisc) diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index 44991ea..795d986 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -640,8 +640,10 @@ static struct sk_buff *pfifo_fast_dequeue(struct Qdisc *qdisc) { struct pfifo_fast_priv *priv = qdisc_priv(qdisc); struct sk_buff *skb = NULL;
bool need_retry = true; int band;
+retry: for (band = 0; band < PFIFO_FAST_BANDS && !skb; band++) { struct skb_array *q = band2list(priv, band);
@@ -652,6 +654,23 @@ static struct sk_buff *pfifo_fast_dequeue(struct Qdisc *qdisc) } if (likely(skb)) { qdisc_update_stats_at_dequeue(qdisc, skb);
} else if (need_retry &&
test_bit(__QDISC_STATE_MISSED, &qdisc->state)) {
/* Delay clearing the STATE_MISSED here to reduce
* the overhead of the second spin_trylock() in
* qdisc_run_begin() and __netif_schedule() calling
* in qdisc_run_end().
*/
clear_bit(__QDISC_STATE_MISSED, &qdisc->state);
Ditto.
/* Make sure dequeuing happens after clearing
* STATE_MISSED.
*/
smp_mb__after_atomic();
need_retry = false;
goto retry;
Two concurrent pfifo_fast_dequeue() would possibly retry it at the same time when they test __QDISC_STATE_MISSED at the same time and get true. Is this a problem?
Also, any reason why you want pfifo_fast to handle a generic Qdisc flag? IOW, why not handle this logic in, for example, qdisc_restart()?
Thanks.