From: Gaosheng Cui cuigaosheng1@huawei.com
stable inclusion from stable-v4.19.268 commit bbd0427fc0241bccb2c56235cbaad9fd63cd615b category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/I69KBO CVE: NA
--------------------------------
[ Upstream commit 986d93f55bdeab1cac858d1e47b41fac10b2d7f6 ]
Shifting signed 32-bit value by 31 bits is undefined, so changing significant bit to unsigned. The UBSAN warning calltrace like below:
UBSAN: shift-out-of-bounds in kernel/auditfilter.c:179:23 left shift of 1 by 31 places cannot be represented in type 'int' Call Trace: <TASK> dump_stack_lvl+0x7d/0xa5 dump_stack+0x15/0x1b ubsan_epilogue+0xe/0x4e __ubsan_handle_shift_out_of_bounds+0x1e7/0x20c audit_register_class+0x9d/0x137 audit_classes_init+0x4d/0xb8 do_one_initcall+0x76/0x430 kernel_init_freeable+0x3b3/0x422 kernel_init+0x24/0x1e0 ret_from_fork+0x1f/0x30 </TASK>
Signed-off-by: Gaosheng Cui cuigaosheng1@huawei.com [PM: remove bad 'Fixes' tag as issue predates git, added in v2.6.6-rc1] Signed-off-by: Paul Moore paul@paul-moore.com Signed-off-by: Sasha Levin sashal@kernel.org Signed-off-by: Yongqiang Liu liuyongqiang13@huawei.com --- include/uapi/linux/audit.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/include/uapi/linux/audit.h b/include/uapi/linux/audit.h index 818ae690ab79..b163911b1d39 100644 --- a/include/uapi/linux/audit.h +++ b/include/uapi/linux/audit.h @@ -177,7 +177,7 @@ #define AUDIT_MAX_KEY_LEN 256 #define AUDIT_BITMASK_SIZE 64 #define AUDIT_WORD(nr) ((__u32)((nr)/32)) -#define AUDIT_BIT(nr) (1 << ((nr) - AUDIT_WORD(nr)*32)) +#define AUDIT_BIT(nr) (1U << ((nr) - AUDIT_WORD(nr)*32))
#define AUDIT_SYSCALL_CLASSES 16 #define AUDIT_CLASS_DIR_WRITE 0
From: Herbert Xu herbert@gondor.apana.org.au
stable inclusion from stable-v4.19.268 commit 0c76cb8dcaf2561c7acef006af29ab9709fe001c category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/I69KBO CVE: NA
--------------------------------
[ Upstream commit 7f57f8165cb6d2c206e2b9ada53b9e2d6d8af42f ]
The function pfkey_send_acquire may race with pfkey_register (which could even be in a different name space). This may result in a buffer overrun.
Allocating the maximum amount of memory that could be used prevents this.
Reported-by: syzbot+1e9af9185d8850e2c2fa@syzkaller.appspotmail.com Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Herbert Xu herbert@gondor.apana.org.au Reviewed-by: Sabrina Dubroca sd@queasysnail.net Reviewed-by: Eric Dumazet edumazet@google.com Signed-off-by: Steffen Klassert steffen.klassert@secunet.com Signed-off-by: Sasha Levin sashal@kernel.org Signed-off-by: Yongqiang Liu liuyongqiang13@huawei.com --- net/key/af_key.c | 32 ++++++++++++++++++++++---------- 1 file changed, 22 insertions(+), 10 deletions(-)
diff --git a/net/key/af_key.c b/net/key/af_key.c index 23470b0d84aa..6a170a37d9e2 100644 --- a/net/key/af_key.c +++ b/net/key/af_key.c @@ -2911,7 +2911,7 @@ static int count_ah_combs(const struct xfrm_tmpl *t) break; if (!aalg->pfkey_supported) continue; - if (aalg_tmpl_set(t, aalg) && aalg->available) + if (aalg_tmpl_set(t, aalg)) sz += sizeof(struct sadb_comb); } return sz + sizeof(struct sadb_prop); @@ -2929,7 +2929,7 @@ static int count_esp_combs(const struct xfrm_tmpl *t) if (!ealg->pfkey_supported) continue;
- if (!(ealg_tmpl_set(t, ealg) && ealg->available)) + if (!(ealg_tmpl_set(t, ealg))) continue;
for (k = 1; ; k++) { @@ -2940,16 +2940,17 @@ static int count_esp_combs(const struct xfrm_tmpl *t) if (!aalg->pfkey_supported) continue;
- if (aalg_tmpl_set(t, aalg) && aalg->available) + if (aalg_tmpl_set(t, aalg)) sz += sizeof(struct sadb_comb); } } return sz + sizeof(struct sadb_prop); }
-static void dump_ah_combs(struct sk_buff *skb, const struct xfrm_tmpl *t) +static int dump_ah_combs(struct sk_buff *skb, const struct xfrm_tmpl *t) { struct sadb_prop *p; + int sz = 0; int i;
p = skb_put(skb, sizeof(struct sadb_prop)); @@ -2977,13 +2978,17 @@ static void dump_ah_combs(struct sk_buff *skb, const struct xfrm_tmpl *t) c->sadb_comb_soft_addtime = 20*60*60; c->sadb_comb_hard_usetime = 8*60*60; c->sadb_comb_soft_usetime = 7*60*60; + sz += sizeof(*c); } } + + return sz + sizeof(*p); }
-static void dump_esp_combs(struct sk_buff *skb, const struct xfrm_tmpl *t) +static int dump_esp_combs(struct sk_buff *skb, const struct xfrm_tmpl *t) { struct sadb_prop *p; + int sz = 0; int i, k;
p = skb_put(skb, sizeof(struct sadb_prop)); @@ -3025,8 +3030,11 @@ static void dump_esp_combs(struct sk_buff *skb, const struct xfrm_tmpl *t) c->sadb_comb_soft_addtime = 20*60*60; c->sadb_comb_hard_usetime = 8*60*60; c->sadb_comb_soft_usetime = 7*60*60; + sz += sizeof(*c); } } + + return sz + sizeof(*p); }
static int key_notify_policy_expire(struct xfrm_policy *xp, const struct km_event *c) @@ -3156,6 +3164,7 @@ static int pfkey_send_acquire(struct xfrm_state *x, struct xfrm_tmpl *t, struct struct sadb_x_sec_ctx *sec_ctx; struct xfrm_sec_ctx *xfrm_ctx; int ctx_size = 0; + int alg_size = 0;
sockaddr_size = pfkey_sockaddr_size(x->props.family); if (!sockaddr_size) @@ -3167,16 +3176,16 @@ static int pfkey_send_acquire(struct xfrm_state *x, struct xfrm_tmpl *t, struct sizeof(struct sadb_x_policy);
if (x->id.proto == IPPROTO_AH) - size += count_ah_combs(t); + alg_size = count_ah_combs(t); else if (x->id.proto == IPPROTO_ESP) - size += count_esp_combs(t); + alg_size = count_esp_combs(t);
if ((xfrm_ctx = x->security)) { ctx_size = PFKEY_ALIGN8(xfrm_ctx->ctx_len); size += sizeof(struct sadb_x_sec_ctx) + ctx_size; }
- skb = alloc_skb(size + 16, GFP_ATOMIC); + skb = alloc_skb(size + alg_size + 16, GFP_ATOMIC); if (skb == NULL) return -ENOMEM;
@@ -3230,10 +3239,13 @@ static int pfkey_send_acquire(struct xfrm_state *x, struct xfrm_tmpl *t, struct pol->sadb_x_policy_priority = xp->priority;
/* Set sadb_comb's. */ + alg_size = 0; if (x->id.proto == IPPROTO_AH) - dump_ah_combs(skb, t); + alg_size = dump_ah_combs(skb, t); else if (x->id.proto == IPPROTO_ESP) - dump_esp_combs(skb, t); + alg_size = dump_esp_combs(skb, t); + + hdr->sadb_msg_len += alg_size / 8;
/* security context */ if (xfrm_ctx) {
From: Yang Yingliang yangyingliang@huawei.com
stable inclusion from stable-v4.19.268 commit 171d5fcca1e38b719bfe9b05cf0675f37e1c9d86 category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/I69KBO CVE: NA
--------------------------------
[ Upstream commit 60d865bd5a9b15a3961eb1c08bd4155682a3c81e ]
In of_fwnode_get_reference_args(), the refcount of of_args.np has been incremented in the case of successful return from of_parse_phandle_with_args() or of_parse_phandle_with_fixed_args().
Decrement the refcount if of_args is not returned to the caller of of_fwnode_get_reference_args().
Fixes: 3e3119d3088f ("device property: Introduce fwnode_property_get_reference_args") Signed-off-by: Yang Yingliang yangyingliang@huawei.com Reviewed-by: Sakari Ailus sakari.ailus@linux.intel.com Reviewed-by: Frank Rowand frowand.list@gmail.com Link: https://lore.kernel.org/r/20221121023209.3909759-1-yangyingliang@huawei.com Signed-off-by: Rob Herring robh@kernel.org Signed-off-by: Sasha Levin sashal@kernel.org Signed-off-by: Yongqiang Liu liuyongqiang13@huawei.com --- drivers/of/property.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-)
diff --git a/drivers/of/property.c b/drivers/of/property.c index 43720c2de138..13c7e55f5cba 100644 --- a/drivers/of/property.c +++ b/drivers/of/property.c @@ -918,8 +918,10 @@ of_fwnode_get_reference_args(const struct fwnode_handle *fwnode, nargs, index, &of_args); if (ret < 0) return ret; - if (!args) + if (!args) { + of_node_put(of_args.np); return 0; + }
args->nargs = of_args.args_count; args->fwnode = of_fwnode_handle(of_args.np);
From: Shigeru Yoshida syoshida@redhat.com
stable inclusion from stable-v4.19.268 commit 1f23f1890d91812c35d32eab1b49621b6d32dc7b category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/I69KBO CVE: NA
--------------------------------
[ Upstream commit 5daadc86f27ea4d691e2131c04310d0418c6cd12 ]
syzbot reported use-after-free in tun_detach() [1]. This causes call trace like below:
================================================================== BUG: KASAN: use-after-free in notifier_call_chain+0x1ee/0x200 kernel/notifier.c:75 Read of size 8 at addr ffff88807324e2a8 by task syz-executor.0/3673
CPU: 0 PID: 3673 Comm: syz-executor.0 Not tainted 6.1.0-rc5-syzkaller-00044-gcc675d22e422 #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 10/26/2022 Call Trace: <TASK> __dump_stack lib/dump_stack.c:88 [inline] dump_stack_lvl+0xd1/0x138 lib/dump_stack.c:106 print_address_description mm/kasan/report.c:284 [inline] print_report+0x15e/0x461 mm/kasan/report.c:395 kasan_report+0xbf/0x1f0 mm/kasan/report.c:495 notifier_call_chain+0x1ee/0x200 kernel/notifier.c:75 call_netdevice_notifiers_info+0x86/0x130 net/core/dev.c:1942 call_netdevice_notifiers_extack net/core/dev.c:1983 [inline] call_netdevice_notifiers net/core/dev.c:1997 [inline] netdev_wait_allrefs_any net/core/dev.c:10237 [inline] netdev_run_todo+0xbc6/0x1100 net/core/dev.c:10351 tun_detach drivers/net/tun.c:704 [inline] tun_chr_close+0xe4/0x190 drivers/net/tun.c:3467 __fput+0x27c/0xa90 fs/file_table.c:320 task_work_run+0x16f/0x270 kernel/task_work.c:179 exit_task_work include/linux/task_work.h:38 [inline] do_exit+0xb3d/0x2a30 kernel/exit.c:820 do_group_exit+0xd4/0x2a0 kernel/exit.c:950 get_signal+0x21b1/0x2440 kernel/signal.c:2858 arch_do_signal_or_restart+0x86/0x2300 arch/x86/kernel/signal.c:869 exit_to_user_mode_loop kernel/entry/common.c:168 [inline] exit_to_user_mode_prepare+0x15f/0x250 kernel/entry/common.c:203 __syscall_exit_to_user_mode_work kernel/entry/common.c:285 [inline] syscall_exit_to_user_mode+0x1d/0x50 kernel/entry/common.c:296 do_syscall_64+0x46/0xb0 arch/x86/entry/common.c:86 entry_SYSCALL_64_after_hwframe+0x63/0xcd
The cause of the issue is that sock_put() from __tun_detach() drops last reference count for struct net, and then notifier_call_chain() from netdev_state_change() accesses that struct net.
This patch fixes the issue by calling sock_put() from tun_detach() after all necessary accesses for the struct net has done.
Fixes: 83c1f36f9880 ("tun: send netlink notification when the device is modified") Reported-by: syzbot+106f9b687cd64ee70cd1@syzkaller.appspotmail.com Link: https://syzkaller.appspot.com/bug?id=96eb7f1ce75ef933697f24eeab928c4a716edef... [1] Signed-off-by: Shigeru Yoshida syoshida@redhat.com Link: https://lore.kernel.org/r/20221124175134.1589053-1-syoshida@redhat.com Signed-off-by: Paolo Abeni pabeni@redhat.com Signed-off-by: Sasha Levin sashal@kernel.org Signed-off-by: Yongqiang Liu liuyongqiang13@huawei.com --- drivers/net/tun.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-)
diff --git a/drivers/net/tun.c b/drivers/net/tun.c index 01de856c9774..240dd4523a7d 100644 --- a/drivers/net/tun.c +++ b/drivers/net/tun.c @@ -748,7 +748,6 @@ static void __tun_detach(struct tun_file *tfile, bool clean) if (tun) xdp_rxq_info_unreg(&tfile->xdp_rxq); ptr_ring_cleanup(&tfile->tx_ring, tun_ptr_free); - sock_put(&tfile->sk); } }
@@ -764,6 +763,9 @@ static void tun_detach(struct tun_file *tfile, bool clean) if (dev) netdev_state_change(dev); rtnl_unlock(); + + if (clean) + sock_put(&tfile->sk); }
static void tun_detach_all(struct net_device *dev)
From: Willem de Bruijn willemb@google.com
stable inclusion from stable-v4.19.268 commit 36e4b80042d2cd724335e75df492682cc9096dec category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/I69KBO CVE: NA
--------------------------------
[ Upstream commit b85f628aa158a653c006e9c1405a117baef8c868 ]
CHECKSUM_COMPLETE signals that skb->csum stores the sum over the entire packet. It does not imply that an embedded l4 checksum field has been validated.
Fixes: 682f048bd494 ("af_packet: pass checksum validation status to the user") Signed-off-by: Willem de Bruijn willemb@google.com Link: https://lore.kernel.org/r/20221128161812.640098-1-willemdebruijn.kernel@gmai... Signed-off-by: Jakub Kicinski kuba@kernel.org Signed-off-by: Sasha Levin sashal@kernel.org Signed-off-by: Yongqiang Liu liuyongqiang13@huawei.com --- net/packet/af_packet.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-)
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index fded065abd3c..9dc974649a1a 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -2206,8 +2206,7 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev, if (skb->ip_summed == CHECKSUM_PARTIAL) status |= TP_STATUS_CSUMNOTREADY; else if (skb->pkt_type != PACKET_OUTGOING && - (skb->ip_summed == CHECKSUM_COMPLETE || - skb_csum_unnecessary(skb))) + skb_csum_unnecessary(skb)) status |= TP_STATUS_CSUM_VALID;
if (snaplen > res) @@ -3446,8 +3445,7 @@ static int packet_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, if (skb->ip_summed == CHECKSUM_PARTIAL) aux.tp_status |= TP_STATUS_CSUMNOTREADY; else if (skb->pkt_type != PACKET_OUTGOING && - (skb->ip_summed == CHECKSUM_COMPLETE || - skb_csum_unnecessary(skb))) + skb_csum_unnecessary(skb)) aux.tp_status |= TP_STATUS_CSUM_VALID;
aux.tp_len = origlen;
From: James Morse james.morse@arm.com
stable inclusion from stable-v4.19.268 commit 70133fb6e3cebb7c7b295d848f3ab76ee22ab4dd category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/I69KBO CVE: NA
--------------------------------
Sami reports that linux panic()s when resuming from suspend to RAM. This is because when CPUs are brought back online, they re-enable any necessary mitigations.
The Spectre-v2 and Spectre-BHB mitigations interact as both need to done by KVM when exiting a guest. Slots KVM can use as vectors are allocated, and templates for the mitigation are patched into the vector.
This fails if a new slot needs to be allocated once the kernel has finished booting as it is no-longer possible to modify KVM's vectors: | root@adam:/sys/devices/system/cpu/cpu1# echo 1 > online | Unable to handle kernel write to read-only memory at virtual add> | Mem abort info: | ESR = 0x9600004e | Exception class = DABT (current EL), IL = 32 bits | SET = 0, FnV = 0 | EA = 0, S1PTW = 0 | Data abort info: | ISV = 0, ISS = 0x0000004e | CM = 0, WnR = 1 | swapper pgtable: 4k pages, 48-bit VAs, pgdp = 000000000f07a71c | [ffff800000b4b800] pgd=00000009ffff8803, pud=00000009ffff7803, p> | Internal error: Oops: 9600004e [#1] PREEMPT SMP | Modules linked in: | Process swapper/1 (pid: 0, stack limit = 0x0000000063153c53) | CPU: 1 PID: 0 Comm: swapper/1 Not tainted 4.19.252-dirty #14 | Hardware name: ARM LTD ARM Juno Development Platform/ARM Juno De> | pstate: 000001c5 (nzcv dAIF -PAN -UAO) | pc : __memcpy+0x48/0x180 | lr : __copy_hyp_vect_bpi+0x64/0x90
| Call trace: | __memcpy+0x48/0x180 | kvm_setup_bhb_slot+0x204/0x2a8 | spectre_bhb_enable_mitigation+0x1b8/0x1d0 | __verify_local_cpu_caps+0x54/0xf0 | check_local_cpu_capabilities+0xc4/0x184 | secondary_start_kernel+0xb0/0x170 | Code: b8404423 b80044c3 36180064 f8408423 (f80084c3) | ---[ end trace 859bcacb09555348 ]--- | Kernel panic - not syncing: Attempted to kill the idle task! | SMP: stopping secondary CPUs | Kernel Offset: disabled | CPU features: 0x10,25806086 | Memory Limit: none | ---[ end Kernel panic - not syncing: Attempted to kill the idle ]
This is only a problem on platforms where there is only one CPU that is vulnerable to both Spectre-v2 and Spectre-BHB.
The Spectre-v2 mitigation identifies the slot it can re-use by the CPU's 'fn'. It unconditionally writes the slot number and 'template_start' pointer. The Spectre-BHB mitigation identifies slots it can re-use by the CPU's template_start pointer, which was previously clobbered by the Spectre-v2 mitigation.
When there is only one CPU that is vulnerable to both issues, this causes Spectre-v2 to try to allocate a new slot, which fails.
Change both mitigations to check whether they are changing the slot this CPU uses before writing the percpu variables again.
This issue only exists in the stable backports for Spectre-BHB which have to use totally different infrastructure to mainline.
Reported-by: Sami Lee sami.lee@mediatek.com Fixes: c20d55174479 ("arm64: Mitigate spectre style branch history side channels") Signed-off-by: James Morse james.morse@arm.com Signed-off-by: Greg Kroah-Hartman gregkh@linuxfoundation.org Signed-off-by: Yongqiang Liu liuyongqiang13@huawei.com --- arch/arm64/kernel/cpu_errata.c | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-)
diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c index fdff9c6d8969..d7b4ed1f37f5 100644 --- a/arch/arm64/kernel/cpu_errata.c +++ b/arch/arm64/kernel/cpu_errata.c @@ -192,9 +192,12 @@ static void install_bp_hardening_cb(bp_hardening_cb_t fn, __copy_hyp_vect_bpi(slot, hyp_vecs_start, hyp_vecs_end); }
- __this_cpu_write(bp_hardening_data.hyp_vectors_slot, slot); - __this_cpu_write(bp_hardening_data.fn, fn); - __this_cpu_write(bp_hardening_data.template_start, hyp_vecs_start); + if (fn != __this_cpu_read(bp_hardening_data.fn)) { + __this_cpu_write(bp_hardening_data.hyp_vectors_slot, slot); + __this_cpu_write(bp_hardening_data.fn, fn); + __this_cpu_write(bp_hardening_data.template_start, + hyp_vecs_start); + } spin_unlock(&bp_lock); } #else @@ -1263,8 +1266,11 @@ static void kvm_setup_bhb_slot(const char *hyp_vecs_start) __copy_hyp_vect_bpi(slot, hyp_vecs_start, hyp_vecs_end); }
- __this_cpu_write(bp_hardening_data.hyp_vectors_slot, slot); - __this_cpu_write(bp_hardening_data.template_start, hyp_vecs_start); + if (hyp_vecs_start != __this_cpu_read(bp_hardening_data.template_start)) { + __this_cpu_write(bp_hardening_data.hyp_vectors_slot, slot); + __this_cpu_write(bp_hardening_data.template_start, + hyp_vecs_start); + } spin_unlock(&bp_lock); } #else
From: James Morse james.morse@arm.com
stable inclusion from stable-v4.19.268 commit 51febca41ae0333c29b4b12778f045a705d89370 category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/I69KBO CVE: NA
--------------------------------
Both the Spectre-v2 and Spectre-BHB mitigations involve running a sequence immediately after exiting a guest, before any branches. In the stable kernels these sequences are built by copying templates into an empty vector slot.
For Spectre-BHB, Cortex-A57 and A72 require the branchy loop with k=8. If Spectre-v2 needs mitigating at the same time, a firmware call to EL3 is needed. The work EL3 does at this point is also enough to mitigate Spectre-BHB.
When enabling the Spectre-BHB mitigation, spectre_bhb_enable_mitigation() should check if a slot has already been allocated for Spectre-v2, meaning no work is needed for Spectre-BHB.
This check was missed in the earlier backport, add it.
Fixes: c20d55174479 ("arm64: Mitigate spectre style branch history side channels") Signed-off-by: James Morse james.morse@arm.com Signed-off-by: Greg Kroah-Hartman gregkh@linuxfoundation.org Signed-off-by: Yongqiang Liu liuyongqiang13@huawei.com --- arch/arm64/kernel/cpu_errata.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-)
diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c index d7b4ed1f37f5..afa72ccbf3ca 100644 --- a/arch/arm64/kernel/cpu_errata.c +++ b/arch/arm64/kernel/cpu_errata.c @@ -1306,7 +1306,13 @@ void spectre_bhb_enable_mitigation(const struct arm64_cpu_capabilities *entry) } else if (spectre_bhb_loop_affected(SCOPE_LOCAL_CPU)) { switch (spectre_bhb_loop_affected(SCOPE_SYSTEM)) { case 8: - kvm_setup_bhb_slot(__spectre_bhb_loop_k8_start); + /* + * A57/A72-r0 will already have selected the + * spectre-indirect vector, which is sufficient + * for BHB too. + */ + if (!__this_cpu_read(bp_hardening_data.fn)) + kvm_setup_bhb_slot(__spectre_bhb_loop_k8_start); break; case 24: kvm_setup_bhb_slot(__spectre_bhb_loop_k24_start);
From: Keith Busch kbusch@kernel.org
stable inclusion from stable-v4.19.268 commit 4722ac3b7809747cc116dc4355757d26e2e6e1fb category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/I69KBO CVE: NA
--------------------------------
commit 23e085b2dead13b51fe86d27069895b740f749c0 upstream.
The passthrough commands already have this restriction, but the other operations do not. Require the same capabilities for all users as all of these operations, which include resets and rescans, can be disruptive.
Signed-off-by: Keith Busch kbusch@kernel.org Signed-off-by: Christoph Hellwig hch@lst.de Signed-off-by: Greg Kroah-Hartman gregkh@linuxfoundation.org Signed-off-by: Ovidiu Panait ovidiu.panait@windriver.com Signed-off-by: Yongqiang Liu liuyongqiang13@huawei.com --- drivers/nvme/host/core.c | 6 ++++++ 1 file changed, 6 insertions(+)
diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index ccfdd4222697..b6e312187b51 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -2838,11 +2838,17 @@ static long nvme_dev_ioctl(struct file *file, unsigned int cmd, case NVME_IOCTL_IO_CMD: return nvme_dev_user_cmd(ctrl, argp); case NVME_IOCTL_RESET: + if (!capable(CAP_SYS_ADMIN)) + return -EACCES; dev_warn(ctrl->device, "resetting controller\n"); return nvme_reset_ctrl_sync(ctrl); case NVME_IOCTL_SUBSYS_RESET: + if (!capable(CAP_SYS_ADMIN)) + return -EACCES; return nvme_reset_subsystem(ctrl); case NVME_IOCTL_RESCAN: + if (!capable(CAP_SYS_ADMIN)) + return -EACCES; nvme_queue_scan(ctrl); return 0; default:
From: Michael Kelley mikelley@microsoft.com
stable inclusion from stable-v4.19.268 commit 6c30c4ec3b8670a3f0a985ad1c9a3ff744b3a190 category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/I69KBO CVE: NA
--------------------------------
[ Upstream commit 4dbd6a3e90e03130973688fd79e19425f720d999 ]
Current code re-calculates the size after aligning the starting and ending physical addresses on a page boundary. But the re-calculation also embeds the masking of high order bits that exceed the size of the physical address space (via PHYSICAL_PAGE_MASK). If the masking removes any high order bits, the size calculation results in a huge value that is likely to immediately fail.
Fix this by re-calculating the page-aligned size first. Then mask any high order bits using PHYSICAL_PAGE_MASK.
Fixes: ffa71f33a820 ("x86, ioremap: Fix incorrect physical address handling in PAE mode") Signed-off-by: Michael Kelley mikelley@microsoft.com Signed-off-by: Borislav Petkov bp@suse.de Acked-by: Dave Hansen dave.hansen@linux.intel.com Cc: stable@kernel.org Link: https://lore.kernel.org/r/1668624097-14884-2-git-send-email-mikelley@microso... Signed-off-by: Sasha Levin sashal@kernel.org Signed-off-by: Yongqiang Liu liuyongqiang13@huawei.com --- arch/x86/mm/ioremap.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-)
diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c index e788c5899415..6d86b14363ce 100644 --- a/arch/x86/mm/ioremap.c +++ b/arch/x86/mm/ioremap.c @@ -171,9 +171,15 @@ static void __iomem *__ioremap_caller(resource_size_t phys_addr, * Mappings have to be page-aligned */ offset = phys_addr & ~PAGE_MASK; - phys_addr &= PHYSICAL_PAGE_MASK; + phys_addr &= PAGE_MASK; size = PAGE_ALIGN(last_addr+1) - phys_addr;
+ /* + * Mask out any bits not part of the actual physical + * address, like memory encryption bits. + */ + phys_addr &= PHYSICAL_PAGE_MASK; + retval = reserve_memtype(phys_addr, (u64)phys_addr + size, pcm, &new_pcm); if (retval) {
From: Tejun Heo tj@kernel.org
stable inclusion from stable-v4.19.269 commit e1ae97624ecf400ea56c238bff23e5cd139df0b8 category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/I69KBO CVE: NA
--------------------------------
commit 4a7ba45b1a435e7097ca0f79a847d0949d0eb088 upstream.
memcg_write_event_control() accesses the dentry->d_name of the specified control fd to route the write call. As a cgroup interface file can't be renamed, it's safe to access d_name as long as the specified file is a regular cgroup file. Also, as these cgroup interface files can't be removed before the directory, it's safe to access the parent too.
Prior to 347c4a874710 ("memcg: remove cgroup_event->cft"), there was a call to __file_cft() which verified that the specified file is a regular cgroupfs file before further accesses. The cftype pointer returned from __file_cft() was no longer necessary and the commit inadvertently dropped the file type check with it allowing any file to slip through. With the invarients broken, the d_name and parent accesses can now race against renames and removals of arbitrary files and cause use-after-free's.
Fix the bug by resurrecting the file type check in __file_cft(). Now that cgroupfs is implemented through kernfs, checking the file operations needs to go through a layer of indirection. Instead, let's check the superblock and dentry type.
Link: https://lkml.kernel.org/r/Y5FRm/cfcKPGzWwl@slm.duckdns.org Fixes: 347c4a874710 ("memcg: remove cgroup_event->cft") Signed-off-by: Tejun Heo tj@kernel.org Reported-by: Jann Horn jannh@google.com Acked-by: Roman Gushchin roman.gushchin@linux.dev Acked-by: Johannes Weiner hannes@cmpxchg.org Cc: Linus Torvalds torvalds@linux-foundation.org Cc: Michal Hocko mhocko@kernel.org Cc: Muchun Song songmuchun@bytedance.com Cc: Shakeel Butt shakeelb@google.com Cc: stable@vger.kernel.org [3.14+] Signed-off-by: Andrew Morton akpm@linux-foundation.org Signed-off-by: Greg Kroah-Hartman gregkh@linuxfoundation.org Signed-off-by: Yongqiang Liu liuyongqiang13@huawei.com --- include/linux/cgroup.h | 1 + kernel/cgroup/cgroup-internal.h | 1 - mm/memcontrol.c | 15 +++++++++++++-- 3 files changed, 14 insertions(+), 3 deletions(-)
diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index f2273649c31b..17a7718d000f 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -69,6 +69,7 @@ struct css_task_iter { struct list_head iters_node; /* css_set->task_iters */ };
+extern struct file_system_type cgroup_fs_type; extern struct cgroup_root cgrp_dfl_root; extern struct css_set init_css_set;
diff --git a/kernel/cgroup/cgroup-internal.h b/kernel/cgroup/cgroup-internal.h index c52883e5cb8a..2e65e4c4d6e7 100644 --- a/kernel/cgroup/cgroup-internal.h +++ b/kernel/cgroup/cgroup-internal.h @@ -144,7 +144,6 @@ extern struct mutex cgroup_mutex; extern spinlock_t css_set_lock; extern struct cgroup_subsys *cgroup_subsys[]; extern struct list_head cgroup_roots; -extern struct file_system_type cgroup_fs_type;
/* iterate across the hierarchies */ #define for_each_root(root) \ diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 345a9d159ad8..63b91a030b02 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -4520,6 +4520,7 @@ static ssize_t memcg_write_event_control(struct kernfs_open_file *of, unsigned int efd, cfd; struct fd efile; struct fd cfile; + struct dentry *cdentry; const char *name; char *endp; int ret; @@ -4570,6 +4571,16 @@ static ssize_t memcg_write_event_control(struct kernfs_open_file *of, if (ret < 0) goto out_put_cfile;
+ /* + * The control file must be a regular cgroup1 file. As a regular cgroup + * file can't be renamed, it's safe to access its name afterwards. + */ + cdentry = cfile.file->f_path.dentry; + if (cdentry->d_sb->s_type != &cgroup_fs_type || !d_is_reg(cdentry)) { + ret = -EINVAL; + goto out_put_cfile; + } + /* * Determine the event callbacks and set them in @event. This used * to be done via struct cftype but cgroup core no longer knows @@ -4578,7 +4589,7 @@ static ssize_t memcg_write_event_control(struct kernfs_open_file *of, * * DO NOT ADD NEW FILES. */ - name = cfile.file->f_path.dentry->d_name.name; + name = cdentry->d_name.name;
if (!strcmp(name, "memory.usage_in_bytes")) { event->register_event = mem_cgroup_usage_register_event; @@ -4602,7 +4613,7 @@ static ssize_t memcg_write_event_control(struct kernfs_open_file *of, * automatically removed on cgroup destruction but the removal is * asynchronous, so take an extra ref on @css. */ - cfile_css = css_tryget_online_from_dir(cfile.file->f_path.dentry->d_parent, + cfile_css = css_tryget_online_from_dir(cdentry->d_parent, &memory_cgrp_subsys); ret = -EINVAL; if (IS_ERR(cfile_css))
From: Pankaj Raghav p.raghav@samsung.com
stable inclusion from stable-v4.19.269 commit e5d18854cb71cc0f5fc4076570e4840c41419d02 category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/I69KBO CVE: NA
--------------------------------
[ Upstream commit 6f2d71524bcfdeb1fcbd22a4a92a5b7b161ab224 ]
A device might have a core quirk for NVME_QUIRK_IGNORE_DEV_SUBNQN (such as Samsung X5) but it would still give a:
"missing or invalid SUBNQN field"
warning as core quirks are filled after calling nvme_init_subnqn. Fill ctrl->quirks from struct core_quirks before calling nvme_init_subsystem to fix this.
Tested on a Samsung X5.
Fixes: ab9e00cc72fa ("nvme: track subsystems") Signed-off-by: Pankaj Raghav p.raghav@samsung.com Signed-off-by: Christoph Hellwig hch@lst.de Signed-off-by: Sasha Levin sashal@kernel.org Signed-off-by: Yongqiang Liu liuyongqiang13@huawei.com --- drivers/nvme/host/core.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-)
diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index b6e312187b51..151ab076822b 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -2617,10 +2617,6 @@ int nvme_init_identify(struct nvme_ctrl *ctrl) if (!ctrl->identified) { int i;
- ret = nvme_init_subsystem(ctrl, id); - if (ret) - goto out_free; - /* * Check for quirks. Quirk can depend on firmware version, * so, in principle, the set of quirks present can change @@ -2633,6 +2629,10 @@ int nvme_init_identify(struct nvme_ctrl *ctrl) if (quirk_matches(id, &core_quirks[i])) ctrl->quirks |= core_quirks[i].quirks; } + + ret = nvme_init_subsystem(ctrl, id); + if (ret) + goto out_free; } memcpy(ctrl->subsys->firmware_rev, id->fr, sizeof(ctrl->subsys->firmware_rev));
From: Eric Dumazet edumazet@google.com
stable inclusion from stable-v4.19.269 commit 7e0dcd5f3ade221a6126278aca60c8ab4cc3bce9 category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/I69KBO CVE: NA
--------------------------------
[ Upstream commit 803e84867de59a1e5d126666d25eb4860cfd2ebe ]
Blamed commit claimed rcu_read_lock() was held by ip6_fragment() callers.
It seems to not be always true, at least for UDP stack.
syzbot reported:
BUG: KASAN: use-after-free in ip6_dst_idev include/net/ip6_fib.h:245 [inline] BUG: KASAN: use-after-free in ip6_fragment+0x2724/0x2770 net/ipv6/ip6_output.c:951 Read of size 8 at addr ffff88801d403e80 by task syz-executor.3/7618
CPU: 1 PID: 7618 Comm: syz-executor.3 Not tainted 6.1.0-rc6-syzkaller-00012-g4312098baf37 #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 10/26/2022 Call Trace: <TASK> __dump_stack lib/dump_stack.c:88 [inline] dump_stack_lvl+0xd1/0x138 lib/dump_stack.c:106 print_address_description mm/kasan/report.c:284 [inline] print_report+0x15e/0x45d mm/kasan/report.c:395 kasan_report+0xbf/0x1f0 mm/kasan/report.c:495 ip6_dst_idev include/net/ip6_fib.h:245 [inline] ip6_fragment+0x2724/0x2770 net/ipv6/ip6_output.c:951 __ip6_finish_output net/ipv6/ip6_output.c:193 [inline] ip6_finish_output+0x9a3/0x1170 net/ipv6/ip6_output.c:206 NF_HOOK_COND include/linux/netfilter.h:291 [inline] ip6_output+0x1f1/0x540 net/ipv6/ip6_output.c:227 dst_output include/net/dst.h:445 [inline] ip6_local_out+0xb3/0x1a0 net/ipv6/output_core.c:161 ip6_send_skb+0xbb/0x340 net/ipv6/ip6_output.c:1966 udp_v6_send_skb+0x82a/0x18a0 net/ipv6/udp.c:1286 udp_v6_push_pending_frames+0x140/0x200 net/ipv6/udp.c:1313 udpv6_sendmsg+0x18da/0x2c80 net/ipv6/udp.c:1606 inet6_sendmsg+0x9d/0xe0 net/ipv6/af_inet6.c:665 sock_sendmsg_nosec net/socket.c:714 [inline] sock_sendmsg+0xd3/0x120 net/socket.c:734 sock_write_iter+0x295/0x3d0 net/socket.c:1108 call_write_iter include/linux/fs.h:2191 [inline] new_sync_write fs/read_write.c:491 [inline] vfs_write+0x9ed/0xdd0 fs/read_write.c:584 ksys_write+0x1ec/0x250 fs/read_write.c:637 do_syscall_x64 arch/x86/entry/common.c:50 [inline] do_syscall_64+0x39/0xb0 arch/x86/entry/common.c:80 entry_SYSCALL_64_after_hwframe+0x63/0xcd RIP: 0033:0x7fde3588c0d9 Code: 28 00 00 00 75 05 48 83 c4 28 c3 e8 f1 19 00 00 90 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 c7 c1 b8 ff ff ff f7 d8 64 89 01 48 RSP: 002b:00007fde365b6168 EFLAGS: 00000246 ORIG_RAX: 0000000000000001 RAX: ffffffffffffffda RBX: 00007fde359ac050 RCX: 00007fde3588c0d9 RDX: 000000000000ffdc RSI: 00000000200000c0 RDI: 000000000000000a RBP: 00007fde358e7ae9 R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000246 R12: 0000000000000000 R13: 00007fde35acfb1f R14: 00007fde365b6300 R15: 0000000000022000 </TASK>
Allocated by task 7618: kasan_save_stack+0x22/0x40 mm/kasan/common.c:45 kasan_set_track+0x25/0x30 mm/kasan/common.c:52 __kasan_slab_alloc+0x82/0x90 mm/kasan/common.c:325 kasan_slab_alloc include/linux/kasan.h:201 [inline] slab_post_alloc_hook mm/slab.h:737 [inline] slab_alloc_node mm/slub.c:3398 [inline] slab_alloc mm/slub.c:3406 [inline] __kmem_cache_alloc_lru mm/slub.c:3413 [inline] kmem_cache_alloc+0x2b4/0x3d0 mm/slub.c:3422 dst_alloc+0x14a/0x1f0 net/core/dst.c:92 ip6_dst_alloc+0x32/0xa0 net/ipv6/route.c:344 ip6_rt_pcpu_alloc net/ipv6/route.c:1369 [inline] rt6_make_pcpu_route net/ipv6/route.c:1417 [inline] ip6_pol_route+0x901/0x1190 net/ipv6/route.c:2254 pol_lookup_func include/net/ip6_fib.h:582 [inline] fib6_rule_lookup+0x52e/0x6f0 net/ipv6/fib6_rules.c:121 ip6_route_output_flags_noref+0x2e6/0x380 net/ipv6/route.c:2625 ip6_route_output_flags+0x76/0x320 net/ipv6/route.c:2638 ip6_route_output include/net/ip6_route.h:98 [inline] ip6_dst_lookup_tail+0x5ab/0x1620 net/ipv6/ip6_output.c:1092 ip6_dst_lookup_flow+0x90/0x1d0 net/ipv6/ip6_output.c:1222 ip6_sk_dst_lookup_flow+0x553/0x980 net/ipv6/ip6_output.c:1260 udpv6_sendmsg+0x151d/0x2c80 net/ipv6/udp.c:1554 inet6_sendmsg+0x9d/0xe0 net/ipv6/af_inet6.c:665 sock_sendmsg_nosec net/socket.c:714 [inline] sock_sendmsg+0xd3/0x120 net/socket.c:734 __sys_sendto+0x23a/0x340 net/socket.c:2117 __do_sys_sendto net/socket.c:2129 [inline] __se_sys_sendto net/socket.c:2125 [inline] __x64_sys_sendto+0xe1/0x1b0 net/socket.c:2125 do_syscall_x64 arch/x86/entry/common.c:50 [inline] do_syscall_64+0x39/0xb0 arch/x86/entry/common.c:80 entry_SYSCALL_64_after_hwframe+0x63/0xcd
Freed by task 7599: kasan_save_stack+0x22/0x40 mm/kasan/common.c:45 kasan_set_track+0x25/0x30 mm/kasan/common.c:52 kasan_save_free_info+0x2e/0x40 mm/kasan/generic.c:511 ____kasan_slab_free mm/kasan/common.c:236 [inline] ____kasan_slab_free+0x160/0x1c0 mm/kasan/common.c:200 kasan_slab_free include/linux/kasan.h:177 [inline] slab_free_hook mm/slub.c:1724 [inline] slab_free_freelist_hook+0x8b/0x1c0 mm/slub.c:1750 slab_free mm/slub.c:3661 [inline] kmem_cache_free+0xee/0x5c0 mm/slub.c:3683 dst_destroy+0x2ea/0x400 net/core/dst.c:127 rcu_do_batch kernel/rcu/tree.c:2250 [inline] rcu_core+0x81f/0x1980 kernel/rcu/tree.c:2510 __do_softirq+0x1fb/0xadc kernel/softirq.c:571
Last potentially related work creation: kasan_save_stack+0x22/0x40 mm/kasan/common.c:45 __kasan_record_aux_stack+0xbc/0xd0 mm/kasan/generic.c:481 call_rcu+0x9d/0x820 kernel/rcu/tree.c:2798 dst_release net/core/dst.c:177 [inline] dst_release+0x7d/0xe0 net/core/dst.c:167 refdst_drop include/net/dst.h:256 [inline] skb_dst_drop include/net/dst.h:268 [inline] skb_release_head_state+0x250/0x2a0 net/core/skbuff.c:838 skb_release_all net/core/skbuff.c:852 [inline] __kfree_skb net/core/skbuff.c:868 [inline] kfree_skb_reason+0x151/0x4b0 net/core/skbuff.c:891 kfree_skb_list_reason+0x4b/0x70 net/core/skbuff.c:901 kfree_skb_list include/linux/skbuff.h:1227 [inline] ip6_fragment+0x2026/0x2770 net/ipv6/ip6_output.c:949 __ip6_finish_output net/ipv6/ip6_output.c:193 [inline] ip6_finish_output+0x9a3/0x1170 net/ipv6/ip6_output.c:206 NF_HOOK_COND include/linux/netfilter.h:291 [inline] ip6_output+0x1f1/0x540 net/ipv6/ip6_output.c:227 dst_output include/net/dst.h:445 [inline] ip6_local_out+0xb3/0x1a0 net/ipv6/output_core.c:161 ip6_send_skb+0xbb/0x340 net/ipv6/ip6_output.c:1966 udp_v6_send_skb+0x82a/0x18a0 net/ipv6/udp.c:1286 udp_v6_push_pending_frames+0x140/0x200 net/ipv6/udp.c:1313 udpv6_sendmsg+0x18da/0x2c80 net/ipv6/udp.c:1606 inet6_sendmsg+0x9d/0xe0 net/ipv6/af_inet6.c:665 sock_sendmsg_nosec net/socket.c:714 [inline] sock_sendmsg+0xd3/0x120 net/socket.c:734 sock_write_iter+0x295/0x3d0 net/socket.c:1108 call_write_iter include/linux/fs.h:2191 [inline] new_sync_write fs/read_write.c:491 [inline] vfs_write+0x9ed/0xdd0 fs/read_write.c:584 ksys_write+0x1ec/0x250 fs/read_write.c:637 do_syscall_x64 arch/x86/entry/common.c:50 [inline] do_syscall_64+0x39/0xb0 arch/x86/entry/common.c:80 entry_SYSCALL_64_after_hwframe+0x63/0xcd
Second to last potentially related work creation: kasan_save_stack+0x22/0x40 mm/kasan/common.c:45 __kasan_record_aux_stack+0xbc/0xd0 mm/kasan/generic.c:481 call_rcu+0x9d/0x820 kernel/rcu/tree.c:2798 dst_release net/core/dst.c:177 [inline] dst_release+0x7d/0xe0 net/core/dst.c:167 refdst_drop include/net/dst.h:256 [inline] skb_dst_drop include/net/dst.h:268 [inline] __dev_queue_xmit+0x1b9d/0x3ba0 net/core/dev.c:4211 dev_queue_xmit include/linux/netdevice.h:3008 [inline] neigh_resolve_output net/core/neighbour.c:1552 [inline] neigh_resolve_output+0x51b/0x840 net/core/neighbour.c:1532 neigh_output include/net/neighbour.h:546 [inline] ip6_finish_output2+0x56c/0x1530 net/ipv6/ip6_output.c:134 __ip6_finish_output net/ipv6/ip6_output.c:195 [inline] ip6_finish_output+0x694/0x1170 net/ipv6/ip6_output.c:206 NF_HOOK_COND include/linux/netfilter.h:291 [inline] ip6_output+0x1f1/0x540 net/ipv6/ip6_output.c:227 dst_output include/net/dst.h:445 [inline] NF_HOOK include/linux/netfilter.h:302 [inline] NF_HOOK include/linux/netfilter.h:296 [inline] mld_sendpack+0xa09/0xe70 net/ipv6/mcast.c:1820 mld_send_cr net/ipv6/mcast.c:2121 [inline] mld_ifc_work+0x720/0xdc0 net/ipv6/mcast.c:2653 process_one_work+0x9bf/0x1710 kernel/workqueue.c:2289 worker_thread+0x669/0x1090 kernel/workqueue.c:2436 kthread+0x2e8/0x3a0 kernel/kthread.c:376 ret_from_fork+0x1f/0x30 arch/x86/entry/entry_64.S:306
The buggy address belongs to the object at ffff88801d403dc0 which belongs to the cache ip6_dst_cache of size 240 The buggy address is located 192 bytes inside of 240-byte region [ffff88801d403dc0, ffff88801d403eb0)
The buggy address belongs to the physical page: page:ffffea00007500c0 refcount:1 mapcount:0 mapping:0000000000000000 index:0x0 pfn:0x1d403 memcg:ffff888022f49c81 flags: 0xfff00000000200(slab|node=0|zone=1|lastcpupid=0x7ff) raw: 00fff00000000200 ffffea0001ef6580 dead000000000002 ffff88814addf640 raw: 0000000000000000 00000000800c000c 00000001ffffffff ffff888022f49c81 page dumped because: kasan: bad access detected page_owner tracks the page as allocated page last allocated via order 0, migratetype Unmovable, gfp_mask 0x112a20(GFP_ATOMIC|__GFP_NOWARN|__GFP_NORETRY|__GFP_HARDWALL), pid 3719, tgid 3719 (kworker/0:6), ts 136223432244, free_ts 136222971441 prep_new_page mm/page_alloc.c:2539 [inline] get_page_from_freelist+0x10b5/0x2d50 mm/page_alloc.c:4288 __alloc_pages+0x1cb/0x5b0 mm/page_alloc.c:5555 alloc_pages+0x1aa/0x270 mm/mempolicy.c:2285 alloc_slab_page mm/slub.c:1794 [inline] allocate_slab+0x213/0x300 mm/slub.c:1939 new_slab mm/slub.c:1992 [inline] ___slab_alloc+0xa91/0x1400 mm/slub.c:3180 __slab_alloc.constprop.0+0x56/0xa0 mm/slub.c:3279 slab_alloc_node mm/slub.c:3364 [inline] slab_alloc mm/slub.c:3406 [inline] __kmem_cache_alloc_lru mm/slub.c:3413 [inline] kmem_cache_alloc+0x31a/0x3d0 mm/slub.c:3422 dst_alloc+0x14a/0x1f0 net/core/dst.c:92 ip6_dst_alloc+0x32/0xa0 net/ipv6/route.c:344 icmp6_dst_alloc+0x71/0x680 net/ipv6/route.c:3261 mld_sendpack+0x5de/0xe70 net/ipv6/mcast.c:1809 mld_send_cr net/ipv6/mcast.c:2121 [inline] mld_ifc_work+0x720/0xdc0 net/ipv6/mcast.c:2653 process_one_work+0x9bf/0x1710 kernel/workqueue.c:2289 worker_thread+0x669/0x1090 kernel/workqueue.c:2436 kthread+0x2e8/0x3a0 kernel/kthread.c:376 ret_from_fork+0x1f/0x30 arch/x86/entry/entry_64.S:306 page last free stack trace: reset_page_owner include/linux/page_owner.h:24 [inline] free_pages_prepare mm/page_alloc.c:1459 [inline] free_pcp_prepare+0x65c/0xd90 mm/page_alloc.c:1509 free_unref_page_prepare mm/page_alloc.c:3387 [inline] free_unref_page+0x1d/0x4d0 mm/page_alloc.c:3483 __unfreeze_partials+0x17c/0x1a0 mm/slub.c:2586 qlink_free mm/kasan/quarantine.c:168 [inline] qlist_free_all+0x6a/0x170 mm/kasan/quarantine.c:187 kasan_quarantine_reduce+0x184/0x210 mm/kasan/quarantine.c:294 __kasan_slab_alloc+0x66/0x90 mm/kasan/common.c:302 kasan_slab_alloc include/linux/kasan.h:201 [inline] slab_post_alloc_hook mm/slab.h:737 [inline] slab_alloc_node mm/slub.c:3398 [inline] kmem_cache_alloc_node+0x304/0x410 mm/slub.c:3443 __alloc_skb+0x214/0x300 net/core/skbuff.c:497 alloc_skb include/linux/skbuff.h:1267 [inline] netlink_alloc_large_skb net/netlink/af_netlink.c:1191 [inline] netlink_sendmsg+0x9a6/0xe10 net/netlink/af_netlink.c:1896 sock_sendmsg_nosec net/socket.c:714 [inline] sock_sendmsg+0xd3/0x120 net/socket.c:734 __sys_sendto+0x23a/0x340 net/socket.c:2117 __do_sys_sendto net/socket.c:2129 [inline] __se_sys_sendto net/socket.c:2125 [inline] __x64_sys_sendto+0xe1/0x1b0 net/socket.c:2125 do_syscall_x64 arch/x86/entry/common.c:50 [inline] do_syscall_64+0x39/0xb0 arch/x86/entry/common.c:80 entry_SYSCALL_64_after_hwframe+0x63/0xcd
Fixes: 1758fd4688eb ("ipv6: remove unnecessary dst_hold() in ip6_fragment()") Reported-by: syzbot+8c0ac31aa9681abb9e2d@syzkaller.appspotmail.com Signed-off-by: Eric Dumazet edumazet@google.com Cc: Wei Wang weiwan@google.com Cc: Martin KaFai Lau kafai@fb.com Link: https://lore.kernel.org/r/20221206101351.2037285-1-edumazet@google.com Signed-off-by: Jakub Kicinski kuba@kernel.org Signed-off-by: Sasha Levin sashal@kernel.org Signed-off-by: Yongqiang Liu liuyongqiang13@huawei.com --- net/ipv6/ip6_output.c | 5 +++++ 1 file changed, 5 insertions(+)
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 6fd1a4b61747..70820d049b92 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -734,6 +734,9 @@ int ip6_fragment(struct net *net, struct sock *sk, struct sk_buff *skb, ipv6_hdr(skb)->payload_len = htons(first_len - sizeof(struct ipv6hdr));
+ /* We prevent @rt from being freed. */ + rcu_read_lock(); + for (;;) { /* Prepare header of the next frame, * before previous one went down. */ @@ -776,6 +779,7 @@ int ip6_fragment(struct net *net, struct sock *sk, struct sk_buff *skb, if (err == 0) { IP6_INC_STATS(net, ip6_dst_idev(&rt->dst), IPSTATS_MIB_FRAGOKS); + rcu_read_unlock(); return 0; }
@@ -783,6 +787,7 @@ int ip6_fragment(struct net *net, struct sock *sk, struct sk_buff *skb,
IP6_INC_STATS(net, ip6_dst_idev(&rt->dst), IPSTATS_MIB_FRAGFAILS); + rcu_read_unlock(); return err;
slow_path_clean:
From: Xia Fukun xiafukun@huawei.com
stable inclusion from stable-v4.19.269 commit a1504a9e205691940d44bf94ad33267eefea3b5e category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/I6994X CVE: NA
Reference: https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/commit/?h=l...
--------------------------------
[ Upstream commit b52be557e24c47286738276121177a41f54e3b83 ]
When __do_semtimedop() goes to sleep because it has to wait for a semaphore value becoming zero or becoming bigger than some threshold, it links the on-stack sem_queue to the sem_array, then goes to sleep without holding a reference on the sem_array.
When __do_semtimedop() comes back out of sleep, one of two things must happen:
a) We prove that the on-stack sem_queue has been disconnected from the (possibly freed) sem_array, making it safe to return from the stack frame that the sem_queue exists in.
b) We stabilize our reference to the sem_array, lock the sem_array, and detach the sem_queue from the sem_array ourselves.
sem_array has RCU lifetime, so for case (b), the reference can be stabilized inside an RCU read-side critical section by locklessly checking whether the sem_queue is still connected to the sem_array.
However, the current code does the lockless check on sem_queue before starting an RCU read-side critical section, so the result of the lockless check immediately becomes useless.
Fix it by doing rcu_read_lock() before the lockless check. Now RCU ensures that if we observe the object being on our queue, the object can't be freed until rcu_read_unlock().
This bug is only hittable on kernel builds with full preemption support (either CONFIG_PREEMPT or PREEMPT_DYNAMIC with preempt=full).
Fixes: 370b262c896e ("ipc/sem: avoid idr tree lookup for interrupted semop") Cc: stable@vger.kernel.org Signed-off-by: Jann Horn jannh@google.com Signed-off-by: Linus Torvalds torvalds@linux-foundation.org Signed-off-by: Sasha Levin sashal@kernel.org Signed-off-by: Xia Fukun xiafukun@huawei.com Reviewed-by: ZhangQiao zhangqiao22@huawei.com Signed-off-by: Yongqiang Liu liuyongqiang13@huawei.com --- ipc/sem.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/ipc/sem.c b/ipc/sem.c index 171881795a30..7c4595d1951c 100644 --- a/ipc/sem.c +++ b/ipc/sem.c @@ -2149,6 +2149,7 @@ static long do_semtimedop(int semid, struct sembuf __user *tsops, * scenarios where we were awakened externally, during the * window between wake_q_add() and wake_up_q(). */ + rcu_read_lock(); error = READ_ONCE(queue.status); if (error != -EINTR) { /* @@ -2158,10 +2159,10 @@ static long do_semtimedop(int semid, struct sembuf __user *tsops, * overwritten by the previous owner of the semaphore. */ smp_mb(); + rcu_read_unlock(); goto out_free; }
- rcu_read_lock(); locknum = sem_lock(sma, sops, nsops);
if (!ipc_valid_object(&sma->sem_perm))