The patchset for CVE-2023-52340.
Jon Maxwell (2): ipv6: remove max_size check inline with ipv4 ipv6: Document that max_size sysctl is deprecated
Ziyang Xuan (1): ipv6: fix kabi change in struct dst_ops
Documentation/networking/ip-sysctl.rst | 3 +++ include/net/dst_ops.h | 2 +- net/core/dst.c | 8 ++------ net/ipv6/route.c | 13 +++++-------- 4 files changed, 11 insertions(+), 15 deletions(-)
From: Jon Maxwell jmaxwell37@gmail.com
stable inclusion from stable-v5.10.208 commit dd56c5790dc3484f3c89fd4e21735c796a82b40d category: bugfix bugzilla: https://gitee.com/src-openeuler/kernel/issues/I8ZGOZ CVE: CVE-2023-52340
Reference: https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/commit/?id=...
--------------------------------
commit af6d10345ca76670c1b7c37799f0d5576ccef277 upstream.
In ip6_dst_gc() replace:
if (entries > gc_thresh)
With:
if (entries > ops->gc_thresh)
Sending Ipv6 packets in a loop via a raw socket triggers an issue where a route is cloned by ip6_rt_cache_alloc() for each packet sent. This quickly consumes the Ipv6 max_size threshold which defaults to 4096 resulting in these warnings:
[1] 99.187805] dst_alloc: 7728 callbacks suppressed [2] Route cache is full: consider increasing sysctl net.ipv6.route.max_size. . . [300] Route cache is full: consider increasing sysctl net.ipv6.route.max_size.
When this happens the packet is dropped and sendto() gets a network is unreachable error:
remaining pkt 200557 errno 101 remaining pkt 196462 errno 101 . . remaining pkt 126821 errno 101
Implement David Aherns suggestion to remove max_size check seeing that Ipv6 has a GC to manage memory usage. Ipv4 already does not check max_size.
Here are some memory comparisons for Ipv4 vs Ipv6 with the patch:
Test by running 5 instances of a program that sends UDP packets to a raw socket 5000000 times. Compare Ipv4 and Ipv6 performance with a similar program.
Ipv4:
Before test:
MemFree: 29427108 kB Slab: 237612 kB
ip6_dst_cache 1912 2528 256 32 2 : tunables 0 0 0 xfrm_dst_cache 0 0 320 25 2 : tunables 0 0 0 ip_dst_cache 2881 3990 192 42 2 : tunables 0 0 0
During test:
MemFree: 29417608 kB Slab: 247712 kB
ip6_dst_cache 1912 2528 256 32 2 : tunables 0 0 0 xfrm_dst_cache 0 0 320 25 2 : tunables 0 0 0 ip_dst_cache 44394 44394 192 42 2 : tunables 0 0 0
After test:
MemFree: 29422308 kB Slab: 238104 kB
ip6_dst_cache 1912 2528 256 32 2 : tunables 0 0 0 xfrm_dst_cache 0 0 320 25 2 : tunables 0 0 0 ip_dst_cache 3048 4116 192 42 2 : tunables 0 0 0
Ipv6 with patch:
Errno 101 errors are not observed anymore with the patch.
Before test:
MemFree: 29422308 kB Slab: 238104 kB
ip6_dst_cache 1912 2528 256 32 2 : tunables 0 0 0 xfrm_dst_cache 0 0 320 25 2 : tunables 0 0 0 ip_dst_cache 3048 4116 192 42 2 : tunables 0 0 0
During Test:
MemFree: 29431516 kB Slab: 240940 kB
ip6_dst_cache 11980 12064 256 32 2 : tunables 0 0 0 xfrm_dst_cache 0 0 320 25 2 : tunables 0 0 0 ip_dst_cache 3048 4116 192 42 2 : tunables 0 0 0
After Test:
MemFree: 29441816 kB Slab: 238132 kB
ip6_dst_cache 1902 2432 256 32 2 : tunables 0 0 0 xfrm_dst_cache 0 0 320 25 2 : tunables 0 0 0 ip_dst_cache 3048 4116 192 42 2 : tunables 0 0 0
Tested-by: Andrea Mayer andrea.mayer@uniroma2.it Signed-off-by: Jon Maxwell jmaxwell37@gmail.com Reviewed-by: David Ahern dsahern@kernel.org Link: https://lore.kernel.org/r/20230112012532.311021-1-jmaxwell37@gmail.com Signed-off-by: Jakub Kicinski kuba@kernel.org Cc: "Jitindar Singh, Suraj" surajjs@amazon.com Signed-off-by: Greg Kroah-Hartman gregkh@linuxfoundation.org Signed-off-by: Ziyang Xuan william.xuanziyang@huawei.com --- include/net/dst_ops.h | 2 +- net/core/dst.c | 8 ++------ net/ipv6/route.c | 13 +++++-------- 3 files changed, 8 insertions(+), 15 deletions(-)
diff --git a/include/net/dst_ops.h b/include/net/dst_ops.h index 250e23188858..85ac66203c21 100644 --- a/include/net/dst_ops.h +++ b/include/net/dst_ops.h @@ -17,7 +17,7 @@ struct dst_ops { unsigned short family; unsigned int gc_thresh;
- int (*gc)(struct dst_ops *ops); + void (*gc)(struct dst_ops *ops); struct dst_entry * (*check)(struct dst_entry *, __u32 cookie); unsigned int (*default_advmss)(const struct dst_entry *); unsigned int (*mtu)(const struct dst_entry *); diff --git a/net/core/dst.c b/net/core/dst.c index fb3bcba87744..453ec8aafc4a 100644 --- a/net/core/dst.c +++ b/net/core/dst.c @@ -83,12 +83,8 @@ void *dst_alloc(struct dst_ops *ops, struct net_device *dev,
if (ops->gc && !(flags & DST_NOCOUNT) && - dst_entries_get_fast(ops) > ops->gc_thresh) { - if (ops->gc(ops)) { - pr_notice_ratelimited("Route cache is full: consider increasing sysctl net.ipv6.route.max_size.\n"); - return NULL; - } - } + dst_entries_get_fast(ops) > ops->gc_thresh) + ops->gc(ops);
dst = kmem_cache_alloc(ops->kmem_cachep, GFP_ATOMIC); if (!dst) diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 73a0a3908824..9603e009cd23 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -89,7 +89,7 @@ static struct dst_entry *ip6_negative_advice(struct dst_entry *); static void ip6_dst_destroy(struct dst_entry *); static void ip6_dst_ifdown(struct dst_entry *, struct net_device *dev, int how); -static int ip6_dst_gc(struct dst_ops *ops); +static void ip6_dst_gc(struct dst_ops *ops);
static int ip6_pkt_discard(struct sk_buff *skb); static int ip6_pkt_discard_out(struct net *net, struct sock *sk, struct sk_buff *skb); @@ -3179,11 +3179,10 @@ struct dst_entry *icmp6_dst_alloc(struct net_device *dev, return dst; }
-static int ip6_dst_gc(struct dst_ops *ops) +static void ip6_dst_gc(struct dst_ops *ops) { struct net *net = container_of(ops, struct net, ipv6.ip6_dst_ops); int rt_min_interval = net->ipv6.sysctl.ip6_rt_gc_min_interval; - int rt_max_size = net->ipv6.sysctl.ip6_rt_max_size; int rt_elasticity = net->ipv6.sysctl.ip6_rt_gc_elasticity; int rt_gc_timeout = net->ipv6.sysctl.ip6_rt_gc_timeout; unsigned long rt_last_gc = net->ipv6.ip6_rt_last_gc; @@ -3191,11 +3190,10 @@ static int ip6_dst_gc(struct dst_ops *ops) int entries;
entries = dst_entries_get_fast(ops); - if (entries > rt_max_size) + if (entries > ops->gc_thresh) entries = dst_entries_get_slow(ops);
- if (time_after(rt_last_gc + rt_min_interval, jiffies) && - entries <= rt_max_size) + if (time_after(rt_last_gc + rt_min_interval, jiffies)) goto out;
fib6_run_gc(atomic_inc_return(&net->ipv6.ip6_rt_gc_expire), net, true); @@ -3205,7 +3203,6 @@ static int ip6_dst_gc(struct dst_ops *ops) out: val = atomic_read(&net->ipv6.ip6_rt_gc_expire); atomic_set(&net->ipv6.ip6_rt_gc_expire, val - (val >> rt_elasticity)); - return entries > rt_max_size; }
static int ip6_nh_lookup_table(struct net *net, struct fib6_config *cfg, @@ -6358,7 +6355,7 @@ static int __net_init ip6_route_net_init(struct net *net) #endif
net->ipv6.sysctl.flush_delay = 0; - net->ipv6.sysctl.ip6_rt_max_size = 4096; + net->ipv6.sysctl.ip6_rt_max_size = INT_MAX; net->ipv6.sysctl.ip6_rt_gc_min_interval = HZ / 2; net->ipv6.sysctl.ip6_rt_gc_timeout = 60*HZ; net->ipv6.sysctl.ip6_rt_gc_interval = 30*HZ;
From: Jon Maxwell jmaxwell37@gmail.com
mainline inclusion from mainline-v6.3-rc1 commit 695a376b59f72c249caad40da64d5cce239c5f48 category: bugfix bugzilla: https://gitee.com/src-openeuler/kernel/issues/I8ZGOZ CVE: CVE-2023-52340
Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?i...
--------------------------------
v4: fix deprecated typo.
Document that max_size is deprecated due to:
commit af6d10345ca7 ("ipv6: remove max_size check inline with ipv4")
Signed-off-by: Jon Maxwell jmaxwell37@gmail.com Link: https://lore.kernel.org/r/20230120232331.1273881-1-jmaxwell37@gmail.com Signed-off-by: Jakub Kicinski kuba@kernel.org Signed-off-by: Ziyang Xuan william.xuanziyang@huawei.com --- Documentation/networking/ip-sysctl.rst | 3 +++ 1 file changed, 3 insertions(+)
diff --git a/Documentation/networking/ip-sysctl.rst b/Documentation/networking/ip-sysctl.rst index b27b8873415a..a1f5d9efe9b4 100644 --- a/Documentation/networking/ip-sysctl.rst +++ b/Documentation/networking/ip-sysctl.rst @@ -125,6 +125,9 @@ route/max_size - INTEGER From linux kernel 3.6 onwards, this is deprecated for ipv4 as route cache is no longer used.
+ From linux kernel 6.3 onwards, this is deprecated for ipv6 + as garbage collection manages cached route entries. + neigh/default/gc_thresh1 - INTEGER Minimum number of entries to keep. Garbage collector will not purge entries if there are fewer than this number.
Offering: HULK hulk inclusion category: bugfix bugzilla: https://gitee.com/src-openeuler/kernel/issues/I8ZGOZ CVE: CVE-2023-52340
--------------------------------
Use KABI_REPLACE to fix kabi change in struct dst_ops.
Signed-off-by: Ziyang Xuan william.xuanziyang@huawei.com --- include/net/dst_ops.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/include/net/dst_ops.h b/include/net/dst_ops.h index 85ac66203c21..f3f5d02f2e73 100644 --- a/include/net/dst_ops.h +++ b/include/net/dst_ops.h @@ -17,7 +17,7 @@ struct dst_ops { unsigned short family; unsigned int gc_thresh;
- void (*gc)(struct dst_ops *ops); + KABI_REPLACE(int (*gc)(struct dst_ops *ops), void (*gc)(struct dst_ops *ops)) struct dst_entry * (*check)(struct dst_entry *, __u32 cookie); unsigned int (*default_advmss)(const struct dst_entry *); unsigned int (*mtu)(const struct dst_entry *);
反馈: 您发送到kernel@openeuler.org的补丁/补丁集,已成功转换为PR! PR链接地址: https://gitee.com/openeuler/kernel/pulls/4428 邮件列表地址:https://mailweb.openeuler.org/hyperkitty/list/kernel@openeuler.org/message/O...
FeedBack: The patch(es) which you have sent to kernel@openeuler.org mailing list has been converted to a pull request successfully! Pull request link: https://gitee.com/openeuler/kernel/pulls/4428 Mailing list address: https://mailweb.openeuler.org/hyperkitty/list/kernel@openeuler.org/message/O...