On Thu, May 13, 2021 at 7:27 PM Yunsheng Lin <linyunsheng(a)huawei.com> wrote:
> struct qdisc_size_table {
> @@ -159,8 +160,33 @@ static inline bool qdisc_is_empty(const struct Qdisc *qdisc)
> static inline bool qdisc_run_begin(struct Qdisc *qdisc)
> {
> if (qdisc->flags & TCQ_F_NOLOCK) {
> + if (spin_trylock(&qdisc->seqlock))
> + goto nolock_empty;
> +
> + /* If the MISSED flag is set, it means other thread has
> + * set the MISSED flag before second spin_trylock(), so
> + * we can return false here to avoid multi cpus doing
> + * the set_bit() and second spin_trylock() concurrently.
> + */
> + if (test_bit(__QDISC_STATE_MISSED, &qdisc->state))
> + return false;
> +
> + /* Set the MISSED flag before the second spin_trylock(),
> + * if the second spin_trylock() return false, it means
> + * other cpu holding the lock will do dequeuing for us
> + * or it will see the MISSED flag set after releasing
> + * lock and reschedule the net_tx_action() to do the
> + * dequeuing.
> + */
> + set_bit(__QDISC_STATE_MISSED, &qdisc->state);
> +
> + /* Retry again in case other CPU may not see the new flag
> + * after it releases the lock at the end of qdisc_run_end().
> + */
> if (!spin_trylock(&qdisc->seqlock))
> return false;
> +
> +nolock_empty:
> WRITE_ONCE(qdisc->empty, false);
> } else if (qdisc_is_running(qdisc)) {
> return false;
> @@ -176,8 +202,15 @@ static inline bool qdisc_run_begin(struct Qdisc *qdisc)
> static inline void qdisc_run_end(struct Qdisc *qdisc)
> {
> write_seqcount_end(&qdisc->running);
> - if (qdisc->flags & TCQ_F_NOLOCK)
> + if (qdisc->flags & TCQ_F_NOLOCK) {
> spin_unlock(&qdisc->seqlock);
> +
> + if (unlikely(test_bit(__QDISC_STATE_MISSED,
> + &qdisc->state))) {
> + clear_bit(__QDISC_STATE_MISSED, &qdisc->state);
We have test_and_clear_bit() which is atomic, test_bit()+clear_bit()
is not.
> + __netif_schedule(qdisc);
> + }
> + }
> }
>
> static inline bool qdisc_may_bulk(const struct Qdisc *qdisc)
> diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
> index 44991ea..795d986 100644
> --- a/net/sched/sch_generic.c
> +++ b/net/sched/sch_generic.c
> @@ -640,8 +640,10 @@ static struct sk_buff *pfifo_fast_dequeue(struct Qdisc *qdisc)
> {
> struct pfifo_fast_priv *priv = qdisc_priv(qdisc);
> struct sk_buff *skb = NULL;
> + bool need_retry = true;
> int band;
>
> +retry:
> for (band = 0; band < PFIFO_FAST_BANDS && !skb; band++) {
> struct skb_array *q = band2list(priv, band);
>
> @@ -652,6 +654,23 @@ static struct sk_buff *pfifo_fast_dequeue(struct Qdisc *qdisc)
> }
> if (likely(skb)) {
> qdisc_update_stats_at_dequeue(qdisc, skb);
> + } else if (need_retry &&
> + test_bit(__QDISC_STATE_MISSED, &qdisc->state)) {
> + /* Delay clearing the STATE_MISSED here to reduce
> + * the overhead of the second spin_trylock() in
> + * qdisc_run_begin() and __netif_schedule() calling
> + * in qdisc_run_end().
> + */
> + clear_bit(__QDISC_STATE_MISSED, &qdisc->state);
Ditto.
> +
> + /* Make sure dequeuing happens after clearing
> + * STATE_MISSED.
> + */
> + smp_mb__after_atomic();
> +
> + need_retry = false;
> +
> + goto retry;
Two concurrent pfifo_fast_dequeue() would possibly retry it at the
same time when they test __QDISC_STATE_MISSED at the same
time and get true. Is this a problem?
Also, any reason why you want pfifo_fast to handle a generic
Qdisc flag? IOW, why not handle this logic in, for example,
qdisc_restart()?
Thanks.