From: Jon Maxwell jmaxwell37@gmail.com
stable inclusion from stable-v4.19.305 commit 95372b040ae689293c6863b90049f1af68410c8b category: bugfix bugzilla: https://gitee.com/src-openeuler/kernel/issues/I8ZGOZ?from=project-issue CVE: CVE-2023-52340
Reference: https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/commit/?id=...
--------------------------------
commit af6d10345ca76670c1b7c37799f0d5576ccef277 upstream.
In ip6_dst_gc() replace:
if (entries > gc_thresh)
With:
if (entries > ops->gc_thresh)
Sending Ipv6 packets in a loop via a raw socket triggers an issue where a route is cloned by ip6_rt_cache_alloc() for each packet sent. This quickly consumes the Ipv6 max_size threshold which defaults to 4096 resulting in these warnings:
[1] 99.187805] dst_alloc: 7728 callbacks suppressed [2] Route cache is full: consider increasing sysctl net.ipv6.route.max_size. . . [300] Route cache is full: consider increasing sysctl net.ipv6.route.max_size.
When this happens the packet is dropped and sendto() gets a network is unreachable error:
remaining pkt 200557 errno 101 remaining pkt 196462 errno 101 . . remaining pkt 126821 errno 101
Implement David Aherns suggestion to remove max_size check seeing that Ipv6 has a GC to manage memory usage. Ipv4 already does not check max_size.
Here are some memory comparisons for Ipv4 vs Ipv6 with the patch:
Test by running 5 instances of a program that sends UDP packets to a raw socket 5000000 times. Compare Ipv4 and Ipv6 performance with a similar program.
Ipv4:
Before test:
MemFree: 29427108 kB Slab: 237612 kB
ip6_dst_cache 1912 2528 256 32 2 : tunables 0 0 0 xfrm_dst_cache 0 0 320 25 2 : tunables 0 0 0 ip_dst_cache 2881 3990 192 42 2 : tunables 0 0 0
During test:
MemFree: 29417608 kB Slab: 247712 kB
ip6_dst_cache 1912 2528 256 32 2 : tunables 0 0 0 xfrm_dst_cache 0 0 320 25 2 : tunables 0 0 0 ip_dst_cache 44394 44394 192 42 2 : tunables 0 0 0
After test:
MemFree: 29422308 kB Slab: 238104 kB
ip6_dst_cache 1912 2528 256 32 2 : tunables 0 0 0 xfrm_dst_cache 0 0 320 25 2 : tunables 0 0 0 ip_dst_cache 3048 4116 192 42 2 : tunables 0 0 0
Ipv6 with patch:
Errno 101 errors are not observed anymore with the patch.
Before test:
MemFree: 29422308 kB Slab: 238104 kB
ip6_dst_cache 1912 2528 256 32 2 : tunables 0 0 0 xfrm_dst_cache 0 0 320 25 2 : tunables 0 0 0 ip_dst_cache 3048 4116 192 42 2 : tunables 0 0 0
During Test:
MemFree: 29431516 kB Slab: 240940 kB
ip6_dst_cache 11980 12064 256 32 2 : tunables 0 0 0 xfrm_dst_cache 0 0 320 25 2 : tunables 0 0 0 ip_dst_cache 3048 4116 192 42 2 : tunables 0 0 0
After Test:
MemFree: 29441816 kB Slab: 238132 kB
ip6_dst_cache 1902 2432 256 32 2 : tunables 0 0 0 xfrm_dst_cache 0 0 320 25 2 : tunables 0 0 0 ip_dst_cache 3048 4116 192 42 2 : tunables 0 0 0
Tested-by: Andrea Mayer andrea.mayer@uniroma2.it Signed-off-by: Jon Maxwell jmaxwell37@gmail.com Reviewed-by: David Ahern dsahern@kernel.org Link: https://lore.kernel.org/r/20230112012532.311021-1-jmaxwell37@gmail.com Signed-off-by: Jakub Kicinski kuba@kernel.org Signed-off-by: Suraj Jitindar Singh surajjs@amazon.com Cc: stable@vger.kernel.org # 4.19.x Signed-off-by: Greg Kroah-Hartman gregkh@linuxfoundation.org Conflicts: net/ipv6/route.c Signed-off-by: Ziyang Xuan william.xuanziyang@huawei.com --- include/net/dst_ops.h | 2 +- net/core/dst.c | 8 ++------ net/ipv6/route.c | 13 +++++-------- 3 files changed, 8 insertions(+), 15 deletions(-)
diff --git a/include/net/dst_ops.h b/include/net/dst_ops.h index b8ee48ea07f1..02bf97394f94 100644 --- a/include/net/dst_ops.h +++ b/include/net/dst_ops.h @@ -17,7 +17,7 @@ struct dst_ops { unsigned short family; unsigned int gc_thresh;
- int (*gc)(struct dst_ops *ops); + void (*gc)(struct dst_ops *ops); struct dst_entry * (*check)(struct dst_entry *, __u32 cookie); unsigned int (*default_advmss)(const struct dst_entry *); unsigned int (*mtu)(const struct dst_entry *); diff --git a/net/core/dst.c b/net/core/dst.c index 1a9f84f8cde1..1b1677683b97 100644 --- a/net/core/dst.c +++ b/net/core/dst.c @@ -99,12 +99,8 @@ void *dst_alloc(struct dst_ops *ops, struct net_device *dev,
if (ops->gc && !(flags & DST_NOCOUNT) && - dst_entries_get_fast(ops) > ops->gc_thresh) { - if (ops->gc(ops)) { - pr_notice_ratelimited("Route cache is full: consider increasing sysctl net.ipv6.route.max_size.\n"); - return NULL; - } - } + dst_entries_get_fast(ops) > ops->gc_thresh) + ops->gc(ops);
dst = kmem_cache_alloc(ops->kmem_cachep, GFP_ATOMIC); if (!dst) diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 344e1548fd94..5239514502ad 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -92,7 +92,7 @@ static struct dst_entry *ip6_negative_advice(struct dst_entry *); static void ip6_dst_destroy(struct dst_entry *); static void ip6_dst_ifdown(struct dst_entry *, struct net_device *dev, int how); -static int ip6_dst_gc(struct dst_ops *ops); +static void ip6_dst_gc(struct dst_ops *ops);
static int ip6_pkt_discard(struct sk_buff *skb); static int ip6_pkt_discard_out(struct net *net, struct sock *sk, struct sk_buff *skb); @@ -2770,22 +2770,20 @@ struct dst_entry *icmp6_dst_alloc(struct net_device *dev, return dst; }
-static int ip6_dst_gc(struct dst_ops *ops) +static void ip6_dst_gc(struct dst_ops *ops) { struct net *net = container_of(ops, struct net, ipv6.ip6_dst_ops); int rt_min_interval = net->ipv6.sysctl.ip6_rt_gc_min_interval; - int rt_max_size = net->ipv6.sysctl.ip6_rt_max_size; int rt_elasticity = net->ipv6.sysctl.ip6_rt_gc_elasticity; int rt_gc_timeout = net->ipv6.sysctl.ip6_rt_gc_timeout; unsigned long rt_last_gc = net->ipv6.ip6_rt_last_gc; int entries;
entries = dst_entries_get_fast(ops); - if (entries > rt_max_size) + if (entries > ops->gc_thresh) entries = dst_entries_get_slow(ops);
- if (time_after(rt_last_gc + rt_min_interval, jiffies) && - entries <= rt_max_size) + if (time_after(rt_last_gc + rt_min_interval, jiffies)) goto out;
net->ipv6.ip6_rt_gc_expire++; @@ -2795,7 +2793,6 @@ static int ip6_dst_gc(struct dst_ops *ops) net->ipv6.ip6_rt_gc_expire = rt_gc_timeout>>1; out: net->ipv6.ip6_rt_gc_expire -= net->ipv6.ip6_rt_gc_expire>>rt_elasticity; - return entries > rt_max_size; }
static int ip6_convert_metrics(struct net *net, struct fib6_info *rt, @@ -5353,7 +5350,7 @@ static int __net_init ip6_route_net_init(struct net *net) #endif
net->ipv6.sysctl.flush_delay = 0; - net->ipv6.sysctl.ip6_rt_max_size = 4096; + net->ipv6.sysctl.ip6_rt_max_size = INT_MAX; net->ipv6.sysctl.ip6_rt_gc_min_interval = HZ / 2; net->ipv6.sysctl.ip6_rt_gc_timeout = 60*HZ; net->ipv6.sysctl.ip6_rt_gc_interval = 30*HZ;