From: Willem de Bruijn willemb@google.com
stable inclusion from stable-v5.10.102 commit 4f523f15e5d753ac055302bc29ca9677d6692eed bugzilla: https://gitee.com/openeuler/kernel/issues/I575QT
Reference: https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/commit/?id=...
--------------------------------
commit 0b0dff5b3b98c5c7ce848151df9da0b3cdf0cc8b upstream.
Ipv6 flowlabels historically require a reservation before use. Optionally in exclusive mode (e.g., user-private).
Commit 59c820b2317f ("ipv6: elide flowlabel check if no exclusive leases exist") introduced a fastpath that avoids this check when no exclusive leases exist in the system, and thus any flowlabel use will be granted.
That allows skipping the control operation to reserve a flowlabel entirely. Though with a warning if the fast path fails:
This is an optimization. Robust applications still have to revert to requesting leases if the fast path fails due to an exclusive lease.
Still, this is subtle. Better isolate network namespaces from each other. Flowlabels are per-netns. Also record per-netns whether exclusive leases are in use. Then behavior does not change based on activity in other netns.
Changes v2 - wrap in IS_ENABLED(CONFIG_IPV6) to avoid breakage if disabled
Fixes: 59c820b2317f ("ipv6: elide flowlabel check if no exclusive leases exist") Link: https://lore.kernel.org/netdev/MWHPR2201MB1072BCCCFCE779E4094837ACD0329@MWHP... Reported-by: Congyu Liu liu3101@purdue.edu Signed-off-by: Willem de Bruijn willemb@google.com Tested-by: Congyu Liu liu3101@purdue.edu Link: https://lore.kernel.org/r/20220215160037.1976072-1-willemdebruijn.kernel@gma... Signed-off-by: Jakub Kicinski kuba@kernel.org Signed-off-by: Greg Kroah-Hartman gregkh@linuxfoundation.org Signed-off-by: Wang Yufen wangyufen@huawei.com Reviewed-by: Wei Yongjun weiyongjun1@huawei.com Reviewed-by: Yue Haibing yuehaibing@huawei.com Signed-off-by: Zheng Zengkai zhengzengkai@huawei.com --- include/net/ipv6.h | 5 ++++- include/net/netns/ipv6.h | 5 +++++ net/ipv6/ip6_flowlabel.c | 4 +++- 3 files changed, 12 insertions(+), 2 deletions(-)
diff --git a/include/net/ipv6.h b/include/net/ipv6.h index c0273ae50296..9392a81a3ae4 100644 --- a/include/net/ipv6.h +++ b/include/net/ipv6.h @@ -390,17 +390,20 @@ static inline void txopt_put(struct ipv6_txoptions *opt) kfree_rcu(opt, rcu); }
+#if IS_ENABLED(CONFIG_IPV6) struct ip6_flowlabel *__fl6_sock_lookup(struct sock *sk, __be32 label);
extern struct static_key_false_deferred ipv6_flowlabel_exclusive; static inline struct ip6_flowlabel *fl6_sock_lookup(struct sock *sk, __be32 label) { - if (static_branch_unlikely(&ipv6_flowlabel_exclusive.key)) + if (static_branch_unlikely(&ipv6_flowlabel_exclusive.key) && + READ_ONCE(sock_net(sk)->ipv6.flowlabel_has_excl)) return __fl6_sock_lookup(sk, label) ? : ERR_PTR(-ENOENT);
return NULL; } +#endif
struct ipv6_txoptions *fl6_merge_options(struct ipv6_txoptions *opt_space, struct ip6_flowlabel *fl, diff --git a/include/net/netns/ipv6.h b/include/net/netns/ipv6.h index ce5ed87accda..b2a28201f4fd 100644 --- a/include/net/netns/ipv6.h +++ b/include/net/netns/ipv6.h @@ -83,6 +83,11 @@ struct netns_ipv6 { unsigned long ip6_rt_last_gc; #ifdef CONFIG_IPV6_MULTIPLE_TABLES unsigned int fib6_rules_require_fldissect; +#endif +#ifndef __GENKSYMS__ + unsigned char flowlabel_has_excl; +#endif +#ifdef CONFIG_IPV6_MULTIPLE_TABLES bool fib6_has_custom_rules; #ifdef CONFIG_IPV6_SUBTREES unsigned int fib6_routes_require_src; diff --git a/net/ipv6/ip6_flowlabel.c b/net/ipv6/ip6_flowlabel.c index aa673a6a7e43..ceb85c67ce39 100644 --- a/net/ipv6/ip6_flowlabel.c +++ b/net/ipv6/ip6_flowlabel.c @@ -450,8 +450,10 @@ fl_create(struct net *net, struct sock *sk, struct in6_flowlabel_req *freq, err = -EINVAL; goto done; } - if (fl_shared_exclusive(fl) || fl->opt) + if (fl_shared_exclusive(fl) || fl->opt) { + WRITE_ONCE(sock_net(sk)->ipv6.flowlabel_has_excl, 1); static_branch_deferred_inc(&ipv6_flowlabel_exclusive); + } return fl;
done:
From: Kunkun Jiang jiangkunkun@huawei.com
virt inclusion category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/I573P1 CVE: NA
------------------------------
Some parameters in iommu_clear_dirty_log/vfio_iova_dirty_log_clear have uncorrect type, which used in bitmap operations. In some cases, it will cause data overflow.
Fixes: bbf3b39e5576b (iommu: Introduce dirty log tracking framework) Fixes: f1c9b9fa4fbc0 (vfio/iommu_type1: Add support for manual dirty log clear) Signed-off-by: Kunkun Jiang jiangkunkun@huawei.com Reviewed-by: Keqian Zhu zhukeqian1@huawei.com Signed-off-by: Zheng Zengkai zhengzengkai@huawei.com --- drivers/iommu/iommu.c | 7 +++---- drivers/vfio/vfio_iommu_type1.c | 4 ++-- 2 files changed, 5 insertions(+), 6 deletions(-)
diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c index e61d16f0ede2..9116c93945d0 100644 --- a/drivers/iommu/iommu.c +++ b/drivers/iommu/iommu.c @@ -3142,9 +3142,8 @@ int iommu_clear_dirty_log(struct iommu_domain *domain, unsigned long bitmap_pgshift) { unsigned long riova, rsize; - unsigned int min_pagesz; + unsigned int min_pagesz, rs, re, start, end; bool flush = false; - int rs, re, start, end; int ret = 0;
min_pagesz = 1 << __ffs(domain->pgsize_bitmap); @@ -3160,8 +3159,8 @@ int iommu_clear_dirty_log(struct iommu_domain *domain, end = start + (size >> bitmap_pgshift); bitmap_for_each_set_region(bitmap, rs, re, start, end) { flush = true; - riova = base_iova + (rs << bitmap_pgshift); - rsize = (re - rs) << bitmap_pgshift; + riova = base_iova + ((unsigned long)rs << bitmap_pgshift); + rsize = (unsigned long)(re - rs) << bitmap_pgshift; ret = __iommu_clear_dirty_log(domain, riova, rsize, bitmap, base_iova, bitmap_pgshift); if (ret) diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c index f556b572c86d..1422cbb37013 100644 --- a/drivers/vfio/vfio_iommu_type1.c +++ b/drivers/vfio/vfio_iommu_type1.c @@ -1096,7 +1096,7 @@ static int vfio_iova_dirty_log_clear(u64 __user *bitmap, unsigned long bitmap_size; unsigned long *bitmap_buffer = NULL; bool clear_valid; - int rs, re, start, end, dma_offset; + unsigned int rs, re, start, end, dma_offset; int ret = 0;
bitmap_size = DIRTY_BITMAP_BYTES(size >> pgshift); @@ -1128,7 +1128,7 @@ static int vfio_iova_dirty_log_clear(u64 __user *bitmap, end = (end_iova - iova) >> pgshift; bitmap_for_each_set_region(bitmap_buffer, rs, re, start, end) { clear_valid = true; - riova = iova + (rs << pgshift); + riova = iova + ((unsigned long)rs << pgshift); dma_offset = (riova - dma->iova) >> pgshift; bitmap_clear(dma->bitmap, dma_offset, re - rs); }
From: Willy Tarreau w@1wt.eu
mainline inclusion from mainline-v5.18-rc6 commit b2d057560b8107c633b39aabe517ff9d93f285e3 category: bugfix bugzilla: https://gitee.com/src-openeuler/kernel/issues/I57M5L CVE: CVE-2022-1012
Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?i...
--------------------------------
SipHash replaced MD5 in secure_ipv{4,6}_port_ephemeral() via commit 7cd23e5300c1 ("secure_seq: use SipHash in place of MD5"), but the output remained truncated to 32-bit only. In order to exploit more bits from the hash, let's make the functions return the full 64-bit of siphash_3u32(). We also make sure the port offset calculation in __inet_hash_connect() remains done on 32-bit to avoid the need for div_u64_rem() and an extra cost on 32-bit systems.
Cc: Jason A. Donenfeld Jason@zx2c4.com Cc: Moshe Kol moshe.kol@mail.huji.ac.il Cc: Yossi Gilad yossi.gilad@mail.huji.ac.il Cc: Amit Klein aksecurity@gmail.com Reviewed-by: Eric Dumazet edumazet@google.com Signed-off-by: Willy Tarreau w@1wt.eu Signed-off-by: Jakub Kicinski kuba@kernel.org
Conflicts: net/ipv4/inet_hashtables.c
Signed-off-by: Baisong Zhong zhongbaisong@huawei.com Reviewed-by: Xiu Jianfeng xiujianfeng@huawei.com Reviewed-by: Wei Yongjun weiyongjun1@huawei.com Signed-off-by: Zheng Zengkai zhengzengkai@huawei.com --- include/net/inet_hashtables.h | 2 +- include/net/secure_seq.h | 4 ++-- net/core/secure_seq.c | 4 ++-- net/ipv4/inet_hashtables.c | 9 +++++---- net/ipv6/inet6_hashtables.c | 4 ++-- 5 files changed, 12 insertions(+), 11 deletions(-)
diff --git a/include/net/inet_hashtables.h b/include/net/inet_hashtables.h index ca6a3ea9057e..d4d611064a76 100644 --- a/include/net/inet_hashtables.h +++ b/include/net/inet_hashtables.h @@ -419,7 +419,7 @@ static inline void sk_rcv_saddr_set(struct sock *sk, __be32 addr) }
int __inet_hash_connect(struct inet_timewait_death_row *death_row, - struct sock *sk, u32 port_offset, + struct sock *sk, u64 port_offset, int (*check_established)(struct inet_timewait_death_row *, struct sock *, __u16, struct inet_timewait_sock **)); diff --git a/include/net/secure_seq.h b/include/net/secure_seq.h index d7d2495f83c2..dac91aa38c5a 100644 --- a/include/net/secure_seq.h +++ b/include/net/secure_seq.h @@ -4,8 +4,8 @@
#include <linux/types.h>
-u32 secure_ipv4_port_ephemeral(__be32 saddr, __be32 daddr, __be16 dport); -u32 secure_ipv6_port_ephemeral(const __be32 *saddr, const __be32 *daddr, +u64 secure_ipv4_port_ephemeral(__be32 saddr, __be32 daddr, __be16 dport); +u64 secure_ipv6_port_ephemeral(const __be32 *saddr, const __be32 *daddr, __be16 dport); u32 secure_tcp_seq(__be32 saddr, __be32 daddr, __be16 sport, __be16 dport); diff --git a/net/core/secure_seq.c b/net/core/secure_seq.c index b5bc680d4755..444cce0184c3 100644 --- a/net/core/secure_seq.c +++ b/net/core/secure_seq.c @@ -94,7 +94,7 @@ u32 secure_tcpv6_seq(const __be32 *saddr, const __be32 *daddr, } EXPORT_SYMBOL(secure_tcpv6_seq);
-u32 secure_ipv6_port_ephemeral(const __be32 *saddr, const __be32 *daddr, +u64 secure_ipv6_port_ephemeral(const __be32 *saddr, const __be32 *daddr, __be16 dport) { const struct { @@ -142,7 +142,7 @@ u32 secure_tcp_seq(__be32 saddr, __be32 daddr, } EXPORT_SYMBOL_GPL(secure_tcp_seq);
-u32 secure_ipv4_port_ephemeral(__be32 saddr, __be32 daddr, __be16 dport) +u64 secure_ipv4_port_ephemeral(__be32 saddr, __be32 daddr, __be16 dport) { net_secret_init(); return siphash_3u32((__force u32)saddr, (__force u32)daddr, diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c index e093847c334d..fe74b45ae5b8 100644 --- a/net/ipv4/inet_hashtables.c +++ b/net/ipv4/inet_hashtables.c @@ -504,7 +504,7 @@ static int __inet_check_established(struct inet_timewait_death_row *death_row, return -EADDRNOTAVAIL; }
-static u32 inet_sk_port_offset(const struct sock *sk) +static u64 inet_sk_port_offset(const struct sock *sk) { const struct inet_sock *inet = inet_sk(sk);
@@ -712,7 +712,7 @@ void inet_unhash(struct sock *sk) EXPORT_SYMBOL_GPL(inet_unhash);
int __inet_hash_connect(struct inet_timewait_death_row *death_row, - struct sock *sk, u32 port_offset, + struct sock *sk, u64 port_offset, int (*check_established)(struct inet_timewait_death_row *, struct sock *, __u16, struct inet_timewait_sock **)) { @@ -752,7 +752,8 @@ int __inet_hash_connect(struct inet_timewait_death_row *death_row, if (likely(remaining > 1)) remaining &= ~1U;
- offset = (hint + port_offset) % remaining; + offset = hint + port_offset; + offset %= remaining; /* In first pass we try ports of @low parity. * inet_csk_get_port() does the opposite choice. */ @@ -829,7 +830,7 @@ int __inet_hash_connect(struct inet_timewait_death_row *death_row, int inet_hash_connect(struct inet_timewait_death_row *death_row, struct sock *sk) { - u32 port_offset = 0; + u64 port_offset = 0;
if (!inet_sk(sk)->inet_num) port_offset = inet_sk_port_offset(sk); diff --git a/net/ipv6/inet6_hashtables.c b/net/ipv6/inet6_hashtables.c index 67c9114835c8..c9e7ecc7afd3 100644 --- a/net/ipv6/inet6_hashtables.c +++ b/net/ipv6/inet6_hashtables.c @@ -308,7 +308,7 @@ static int __inet6_check_established(struct inet_timewait_death_row *death_row, return -EADDRNOTAVAIL; }
-static u32 inet6_sk_port_offset(const struct sock *sk) +static u64 inet6_sk_port_offset(const struct sock *sk) { const struct inet_sock *inet = inet_sk(sk);
@@ -320,7 +320,7 @@ static u32 inet6_sk_port_offset(const struct sock *sk) int inet6_hash_connect(struct inet_timewait_death_row *death_row, struct sock *sk) { - u32 port_offset = 0; + u64 port_offset = 0;
if (!inet_sk(sk)->inet_num) port_offset = inet6_sk_port_offset(sk);
From: Wei Li liwei391@huawei.com
hulk inclusion category: bugfix bugzilla: 186819 https://gitee.com/openeuler/kernel/issues/I58L00
--------------------------------
The gic_arch_restore_irqs() depends on commit e7e6a881f797 ("arm64: irqflags: fix incomplete save & restore"). While it is reverted in commit 2e680920c2e7 ("Revert "arm64: irqflags: fix incomplete save & restore""), we should update the logic here too.
Currently the upper 32 bits of 'flags' is always zero, change to use 'GIC_PRIO_PSR_I_SET' to indicate the interrupt enabling status.
Fixes: abde6b940f5a ("stop_machine: mask pseudo nmi before running the callback") Signed-off-by: Wei Li liwei391@huawei.com Reviewed-by: Cheng Jian cj.chengjian@huawei.com Signed-off-by: Zheng Zengkai zhengzengkai@huawei.com --- arch/arm64/include/asm/arch_gicv3.h | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-)
diff --git a/arch/arm64/include/asm/arch_gicv3.h b/arch/arm64/include/asm/arch_gicv3.h index 12aced900ada..80714a8589a0 100644 --- a/arch/arm64/include/asm/arch_gicv3.h +++ b/arch/arm64/include/asm/arch_gicv3.h @@ -191,9 +191,8 @@ static inline void gic_arch_disable_irqs(void)
static inline void gic_arch_restore_irqs(unsigned long flags) { - if (gic_supports_nmi()) - asm volatile ("msr daif, %0" : : "r" (flags >> 32) - : "memory"); + if (gic_supports_nmi() && !(flags & GIC_PRIO_PSR_I_SET)) + gic_arch_enable_irqs(); } #endif /* __ASSEMBLY__ */ #endif /* __ASM_ARCH_GICV3_H */