From: Eric Dumazet edumazet@google.com
stable inclusion from linux-4.19.209 commit eb6eeb056c992d20a4d3b4fb7dd05471ba32fea3
--------------------------------
commit 9efdda4e3abed13f0903b7b6e4d4c2102019440a upstream.
When a qdisc setup including pacing FQ is dismantled and recreated, some TCP packets are sent earlier than instructed by TCP stack.
TCP can be fooled when ACK comes back, because the following operation can return a negative value.
tcp_time_stamp(tp) - tp->rx_opt.rcv_tsecr;
Some paths in TCP stack were not dealing properly with this, this patch addresses four of them.
Fixes: ab408b6dc744 ("tcp: switch tcp and sch_fq to new earliest departure time model") Signed-off-by: Eric Dumazet edumazet@google.com Signed-off-by: David S. Miller davem@davemloft.net Signed-off-by: Qiumiao Zhang zhangqiumiao1@huawei.com Signed-off-by: Greg Kroah-Hartman gregkh@linuxfoundation.org Conflicts: net/ipv4/tcp_timer.c [yyl: keep implention of tcp_clamp_rto_to_user_timeout() and retransmits_timed_out() as mainline] Reviewed-by: Yue Haibing yuehaibing@huawei.com Acked-by: Jason Yan yanaijie@huawei.com Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- net/ipv4/tcp_input.c | 16 ++++++++++------ net/ipv4/tcp_timer.c | 10 ++++++---- 2 files changed, 16 insertions(+), 10 deletions(-)
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index ac6c1c41fbffa..36415c66cd9a9 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -581,10 +581,12 @@ static inline void tcp_rcv_rtt_measure_ts(struct sock *sk, u32 delta = tcp_time_stamp(tp) - tp->rx_opt.rcv_tsecr; u32 delta_us;
- if (!delta) - delta = 1; - delta_us = delta * (USEC_PER_SEC / TCP_TS_HZ); - tcp_rcv_rtt_update(tp, delta_us, 0); + if (likely(delta < INT_MAX / (USEC_PER_SEC / TCP_TS_HZ))) { + if (!delta) + delta = 1; + delta_us = delta * (USEC_PER_SEC / TCP_TS_HZ); + tcp_rcv_rtt_update(tp, delta_us, 0); + } } }
@@ -2934,9 +2936,11 @@ static bool tcp_ack_update_rtt(struct sock *sk, const int flag, if (seq_rtt_us < 0 && tp->rx_opt.saw_tstamp && tp->rx_opt.rcv_tsecr && flag & FLAG_ACKED) { u32 delta = tcp_time_stamp(tp) - tp->rx_opt.rcv_tsecr; - u32 delta_us = delta * (USEC_PER_SEC / TCP_TS_HZ);
- seq_rtt_us = ca_rtt_us = delta_us; + if (likely(delta < INT_MAX / (USEC_PER_SEC / TCP_TS_HZ))) { + seq_rtt_us = delta * (USEC_PER_SEC / TCP_TS_HZ); + ca_rtt_us = seq_rtt_us; + } } rs->rtt_us = ca_rtt_us; /* RTT of last (S)ACKed packet (or -1) */ if (seq_rtt_us < 0) diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c index f4e2d01c058d4..d071ed6b8b9a0 100644 --- a/net/ipv4/tcp_timer.c +++ b/net/ipv4/tcp_timer.c @@ -26,15 +26,17 @@ static u32 tcp_clamp_rto_to_user_timeout(const struct sock *sk) { struct inet_connection_sock *icsk = inet_csk(sk); u32 elapsed, start_ts; + s32 remaining;
start_ts = tcp_sk(sk)->retrans_stamp; if (!icsk->icsk_user_timeout) return icsk->icsk_rto; elapsed = tcp_time_stamp(tcp_sk(sk)) - start_ts; - if (elapsed >= icsk->icsk_user_timeout) + remaining = icsk->icsk_user_timeout - elapsed; + if (remaining <= 0) return 1; /* user timeout has passed; fire ASAP */ - else - return min_t(u32, icsk->icsk_rto, msecs_to_jiffies(icsk->icsk_user_timeout - elapsed)); + + return min_t(u32, icsk->icsk_rto, msecs_to_jiffies(remaining)); }
/** @@ -203,7 +205,7 @@ static bool retransmits_timed_out(struct sock *sk, timeout = tcp_model_timeout(sk, boundary, rto_base); }
- return (tcp_time_stamp(tcp_sk(sk)) - start_ts) >= timeout; + return (s32)(tcp_time_stamp(tcp_sk(sk)) - start_ts - timeout) >= 0; }
/* A write timeout has occurred. Process the after effects. */