From: "Steven Rostedt (VMware)" rostedt@goodmis.org
stable inclusion from linux-4.19.191 commit 3d9281a4ac7171c808f9507f0937eb236b353905
--------------------------------
commit 8c9af478c06bb1ab1422f90d8ecbc53defd44bc3 upstream.
# echo switch_mm:traceoff > /sys/kernel/tracing/set_ftrace_filter
will cause switch_mm to stop tracing by the traceoff command.
# echo -n switch_mm:traceoff > /sys/kernel/tracing/set_ftrace_filter
does nothing.
The reason is that the parsing in the write function only processes commands if it finished parsing (there is white space written after the command). That's to handle:
write(fd, "switch_mm:", 10); write(fd, "traceoff", 8);
cases, where the command is broken over multiple writes.
The problem is if the file descriptor is closed, then the write call is not processed, and the command needs to be processed in the release code. The release code can handle matching of functions, but does not handle commands.
Cc: stable@vger.kernel.org Fixes: eda1e32855656 ("tracing: handle broken names in ftrace filter") Signed-off-by: Steven Rostedt (VMware) rostedt@goodmis.org Signed-off-by: Greg Kroah-Hartman gregkh@linuxfoundation.org Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- kernel/trace/ftrace.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-)
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index e214ef5003e46..b296e94de2eb1 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -5023,7 +5023,10 @@ int ftrace_regex_release(struct inode *inode, struct file *file)
parser = &iter->parser; if (trace_parser_loaded(parser)) { - ftrace_match_records(iter->hash, parser->buffer, parser->idx); + int enable = !(iter->flags & FTRACE_ITER_NOTRACE); + + ftrace_process_regex(iter, parser->buffer, + parser->idx, enable); }
trace_parser_put(parser);
From: Paul Aurich paul@darkrain42.org
stable inclusion from linux-4.19.191 commit e486f8397f3f14a7cadc166138141fdb14379a54
--------------------------------
commit 83728cbf366e334301091d5b808add468ab46b27 upstream.
Avoid a warning if the error percolates back up:
[440700.376476] CIFS VFS: \otters.example.com crypt_message: Could not get encryption key [440700.386947] ------------[ cut here ]------------ [440700.386948] err = 1 [440700.386977] WARNING: CPU: 11 PID: 2733 at /build/linux-hwe-5.4-p6lk6L/linux-hwe-5.4-5.4.0/lib/errseq.c:74 errseq_set+0x5c/0x70 ... [440700.397304] CPU: 11 PID: 2733 Comm: tar Tainted: G OE 5.4.0-70-generic #78~18.04.1-Ubuntu ... [440700.397334] Call Trace: [440700.397346] __filemap_set_wb_err+0x1a/0x70 [440700.397419] cifs_writepages+0x9c7/0xb30 [cifs] [440700.397426] do_writepages+0x4b/0xe0 [440700.397444] __filemap_fdatawrite_range+0xcb/0x100 [440700.397455] filemap_write_and_wait+0x42/0xa0 [440700.397486] cifs_setattr+0x68b/0xf30 [cifs] [440700.397493] notify_change+0x358/0x4a0 [440700.397500] utimes_common+0xe9/0x1c0 [440700.397510] do_utimes+0xc5/0x150 [440700.397520] __x64_sys_utimensat+0x88/0xd0
Fixes: 61cfac6f267d ("CIFS: Fix possible use after free in demultiplex thread") Signed-off-by: Paul Aurich paul@darkrain42.org CC: stable@vger.kernel.org Signed-off-by: Steve French stfrench@microsoft.com Signed-off-by: Greg Kroah-Hartman gregkh@linuxfoundation.org Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- fs/cifs/smb2ops.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c index 9e3ee00d1a49b..b8d01c1234a47 100644 --- a/fs/cifs/smb2ops.c +++ b/fs/cifs/smb2ops.c @@ -2671,7 +2671,7 @@ smb2_get_enc_key(struct TCP_Server_Info *server, __u64 ses_id, int enc, u8 *key) } spin_unlock(&cifs_tcp_ses_lock);
- return 1; + return -EAGAIN; } /* * Encrypt or decrypt @rqst message. @rqst[0] has the following format:
From: Ard Biesheuvel ardb@kernel.org
stable inclusion from linux-4.19.191 commit 67afaf1a0a3b2e23645c86636d887a7ca84a09ed
--------------------------------
[ Upstream commit 83681f2bebb34dbb3f03fecd8f570308ab8b7c2c ]
Given that crypto_alloc_tfm() may return ERR pointers, and to avoid crashes on obscure error paths where such pointers are presented to crypto_destroy_tfm() (such as [0]), add an ERR_PTR check there before dereferencing the second argument as a struct crypto_tfm pointer.
[0] https://lore.kernel.org/linux-crypto/000000000000de949705bc59e0f6@google.com...
Reported-by: syzbot+12cf5fbfdeba210a89dd@syzkaller.appspotmail.com Reviewed-by: Eric Biggers ebiggers@google.com Signed-off-by: Ard Biesheuvel ardb@kernel.org Signed-off-by: Herbert Xu herbert@gondor.apana.org.au Signed-off-by: Sasha Levin sashal@kernel.org Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- crypto/api.c | 2 +- include/crypto/acompress.h | 2 ++ include/crypto/aead.h | 2 ++ include/crypto/akcipher.h | 2 ++ include/crypto/hash.h | 4 ++++ include/crypto/kpp.h | 2 ++ include/crypto/rng.h | 2 ++ include/crypto/skcipher.h | 2 ++ 8 files changed, 17 insertions(+), 1 deletion(-)
diff --git a/crypto/api.c b/crypto/api.c index 7aca9f86c5f39..318e56cdfccf5 100644 --- a/crypto/api.c +++ b/crypto/api.c @@ -572,7 +572,7 @@ void crypto_destroy_tfm(void *mem, struct crypto_tfm *tfm) { struct crypto_alg *alg;
- if (unlikely(!mem)) + if (IS_ERR_OR_NULL(mem)) return;
alg = tfm->__crt_alg; diff --git a/include/crypto/acompress.h b/include/crypto/acompress.h index e328b52425a85..1ff78365607cd 100644 --- a/include/crypto/acompress.h +++ b/include/crypto/acompress.h @@ -152,6 +152,8 @@ static inline struct crypto_acomp *crypto_acomp_reqtfm(struct acomp_req *req) * crypto_free_acomp() -- free ACOMPRESS tfm handle * * @tfm: ACOMPRESS tfm handle allocated with crypto_alloc_acomp() + * + * If @tfm is a NULL or error pointer, this function does nothing. */ static inline void crypto_free_acomp(struct crypto_acomp *tfm) { diff --git a/include/crypto/aead.h b/include/crypto/aead.h index 1e26f790b03fa..c69c545ba39a9 100644 --- a/include/crypto/aead.h +++ b/include/crypto/aead.h @@ -187,6 +187,8 @@ static inline struct crypto_tfm *crypto_aead_tfm(struct crypto_aead *tfm) /** * crypto_free_aead() - zeroize and free aead handle * @tfm: cipher handle to be freed + * + * If @tfm is a NULL or error pointer, this function does nothing. */ static inline void crypto_free_aead(struct crypto_aead *tfm) { diff --git a/include/crypto/akcipher.h b/include/crypto/akcipher.h index b5e11de4d497d..9817f2e5bff83 100644 --- a/include/crypto/akcipher.h +++ b/include/crypto/akcipher.h @@ -174,6 +174,8 @@ static inline struct crypto_akcipher *crypto_akcipher_reqtfm( * crypto_free_akcipher() - free AKCIPHER tfm handle * * @tfm: AKCIPHER tfm handle allocated with crypto_alloc_akcipher() + * + * If @tfm is a NULL or error pointer, this function does nothing. */ static inline void crypto_free_akcipher(struct crypto_akcipher *tfm) { diff --git a/include/crypto/hash.h b/include/crypto/hash.h index 76e432cab75db..552517dcf9e4f 100644 --- a/include/crypto/hash.h +++ b/include/crypto/hash.h @@ -257,6 +257,8 @@ static inline struct crypto_tfm *crypto_ahash_tfm(struct crypto_ahash *tfm) /** * crypto_free_ahash() - zeroize and free the ahash handle * @tfm: cipher handle to be freed + * + * If @tfm is a NULL or error pointer, this function does nothing. */ static inline void crypto_free_ahash(struct crypto_ahash *tfm) { @@ -692,6 +694,8 @@ static inline struct crypto_tfm *crypto_shash_tfm(struct crypto_shash *tfm) /** * crypto_free_shash() - zeroize and free the message digest handle * @tfm: cipher handle to be freed + * + * If @tfm is a NULL or error pointer, this function does nothing. */ static inline void crypto_free_shash(struct crypto_shash *tfm) { diff --git a/include/crypto/kpp.h b/include/crypto/kpp.h index 1bde0a6514fa6..1a34630fc3718 100644 --- a/include/crypto/kpp.h +++ b/include/crypto/kpp.h @@ -159,6 +159,8 @@ static inline void crypto_kpp_set_flags(struct crypto_kpp *tfm, u32 flags) * crypto_free_kpp() - free KPP tfm handle * * @tfm: KPP tfm handle allocated with crypto_alloc_kpp() + * + * If @tfm is a NULL or error pointer, this function does nothing. */ static inline void crypto_free_kpp(struct crypto_kpp *tfm) { diff --git a/include/crypto/rng.h b/include/crypto/rng.h index b95ede354a665..a788c1e5a1217 100644 --- a/include/crypto/rng.h +++ b/include/crypto/rng.h @@ -116,6 +116,8 @@ static inline struct rng_alg *crypto_rng_alg(struct crypto_rng *tfm) /** * crypto_free_rng() - zeroize and free RNG handle * @tfm: cipher handle to be freed + * + * If @tfm is a NULL or error pointer, this function does nothing. */ static inline void crypto_free_rng(struct crypto_rng *tfm) { diff --git a/include/crypto/skcipher.h b/include/crypto/skcipher.h index 2f327f090c3e9..c7553f8b1bb69 100644 --- a/include/crypto/skcipher.h +++ b/include/crypto/skcipher.h @@ -206,6 +206,8 @@ static inline struct crypto_tfm *crypto_skcipher_tfm( /** * crypto_free_skcipher() - zeroize and free cipher handle * @tfm: cipher handle to be freed + * + * If @tfm is a NULL or error pointer, this function does nothing. */ static inline void crypto_free_skcipher(struct crypto_skcipher *tfm) {
From: Vitaly Kuznetsov vkuznets@redhat.com
stable inclusion from linux-4.19.191 commit 873792c84237953385d99ff3c56ed9e467c15300
--------------------------------
[ Upstream commit c93a5e20c3c2dabef8ea360a3d3f18c6f68233ab ]
When irq_matrix_free() is called for an unallocated vector the managed_allocated and total_allocated counters get out of sync with the real state of the matrix. Later, when the last interrupt is freed, these counters will underflow resulting in UINTMAX because the counters are unsigned.
While this is certainly a problem of the calling code, this can be catched in the allocator by checking the allocation bit for the to be freed vector which simplifies debugging.
An example of the problem described above: https://lore.kernel.org/lkml/20210318192819.636943062@linutronix.de/
Add the missing sanity check and emit a warning when it triggers.
Suggested-by: Thomas Gleixner tglx@linutronix.de Signed-off-by: Vitaly Kuznetsov vkuznets@redhat.com Signed-off-by: Thomas Gleixner tglx@linutronix.de Link: https://lore.kernel.org/r/20210319111823.1105248-1-vkuznets@redhat.com Signed-off-by: Sasha Levin sashal@kernel.org Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- kernel/irq/matrix.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-)
diff --git a/kernel/irq/matrix.c b/kernel/irq/matrix.c index 30cc217b86318..0a5b2e26fd544 100644 --- a/kernel/irq/matrix.c +++ b/kernel/irq/matrix.c @@ -416,7 +416,9 @@ void irq_matrix_free(struct irq_matrix *m, unsigned int cpu, if (WARN_ON_ONCE(bit < m->alloc_start || bit >= m->alloc_end)) return;
- clear_bit(bit, cm->alloc_map); + if (WARN_ON_ONCE(!test_and_clear_bit(bit, cm->alloc_map))) + return; + cm->allocated--; if(managed) cm->managed_allocated--;
From: Robin Murphy robin.murphy@arm.com
stable inclusion from linux-4.19.191 commit 213a9a13f0735212410312f61902a40ff065c477
--------------------------------
[ Upstream commit e338cb6bef254821a8c095018fd27254d74bfd6a ]
If we're aborting after failing to register the PMU device, we probably don't want to leak the IRQs that we've claimed.
Signed-off-by: Robin Murphy robin.murphy@arm.com Link: https://lore.kernel.org/r/53031a607fc8412a60024bfb3bb8cd7141f998f5.161677456... Signed-off-by: Will Deacon will@kernel.org Signed-off-by: Sasha Levin sashal@kernel.org Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- drivers/perf/arm_pmu_platform.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/perf/arm_pmu_platform.c b/drivers/perf/arm_pmu_platform.c index 96075cecb0aec..199293450acfc 100644 --- a/drivers/perf/arm_pmu_platform.c +++ b/drivers/perf/arm_pmu_platform.c @@ -236,7 +236,7 @@ int arm_pmu_device_probe(struct platform_device *pdev,
ret = armpmu_register(pmu); if (ret) - goto out_free; + goto out_free_irqs;
return 0;
From: Bill Wendling morbo@google.com
stable inclusion from linux-4.19.191 commit 44149b3e106e4c632d4c5b80650580f823f72c45
--------------------------------
[ Upstream commit 388708028e6937f3fc5fc19aeeb847f8970f489c ]
The arm64 assembler in binutils 2.32 and above generates a program property note in a note section, .note.gnu.property, to encode used x86 ISAs and features. But the kernel linker script only contains a single NOTE segment:
PHDRS { text PT_LOAD FLAGS(5) FILEHDR PHDRS; /* PF_R|PF_X */ dynamic PT_DYNAMIC FLAGS(4); /* PF_R */ note PT_NOTE FLAGS(4); /* PF_R */ }
The NOTE segment generated by the vDSO linker script is aligned to 4 bytes. But the .note.gnu.property section must be aligned to 8 bytes on arm64.
$ readelf -n vdso64.so
Displaying notes found in: .note Owner Data size Description Linux 0x00000004 Unknown note type: (0x00000000) description data: 06 00 00 00 readelf: Warning: note with invalid namesz and/or descsz found at offset 0x20 readelf: Warning: type: 0x78, namesize: 0x00000100, descsize: 0x756e694c, alignment: 8
Since the note.gnu.property section in the vDSO is not checked by the dynamic linker, discard the .note.gnu.property sections in the vDSO.
Similar to commit 4caffe6a28d31 ("x86/vdso: Discard .note.gnu.property sections in vDSO"), but for arm64.
Signed-off-by: Bill Wendling morbo@google.com Reviewed-by: Kees Cook keescook@chromium.org Acked-by: Ard Biesheuvel ardb@kernel.org Link: https://lore.kernel.org/r/20210423205159.830854-1-morbo@google.com Signed-off-by: Catalin Marinas catalin.marinas@arm.com Signed-off-by: Sasha Levin sashal@kernel.org Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- arch/arm64/kernel/vdso/vdso.lds.S | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-)
diff --git a/arch/arm64/kernel/vdso/vdso.lds.S b/arch/arm64/kernel/vdso/vdso.lds.S index beca249bc2f39..b3e6c4d5b75c8 100644 --- a/arch/arm64/kernel/vdso/vdso.lds.S +++ b/arch/arm64/kernel/vdso/vdso.lds.S @@ -39,6 +39,13 @@ SECTIONS .gnu.version_d : { *(.gnu.version_d) } .gnu.version_r : { *(.gnu.version_r) }
+ /* + * Discard .note.gnu.property sections which are unused and have + * different alignment requirement from vDSO note sections. + */ + /DISCARD/ : { + *(.note.GNU-stack .note.gnu.property) + } .note : { *(.note.*) } :text :note
. = ALIGN(16); @@ -59,7 +66,6 @@ SECTIONS PROVIDE(end = .);
/DISCARD/ : { - *(.note.GNU-stack) *(.data .data.* .gnu.linkonce.d.* .sdata*) *(.bss .sbss .dynbss .dynsbss) }
From: Marc Zyngier maz@kernel.org
stable inclusion from linux-4.19.191 commit 7b2162db1498c71962a4bb2f776fa4e76d4d305b
--------------------------------
commit 1ecd5b129252249b9bc03d7645a7bda512747277 upstream.
When failing the driver probe because of invalid firmware properties, the GTDT driver unmaps the interrupt that it mapped earlier.
However, it never checks whether the mapping of the interrupt actially succeeded. Even more, should the firmware report an illegal interrupt number that overlaps with the GIC SGI range, this can result in an IPI being unmapped, and subsequent fireworks (as reported by Dann Frazier).
Rework the driver to have a slightly saner behaviour and actually check whether the interrupt has been mapped before unmapping things.
Reported-by: dann frazier dann.frazier@canonical.com Fixes: ca9ae5ec4ef0 ("acpi/arm64: Add SBSA Generic Watchdog support in GTDT driver") Signed-off-by: Marc Zyngier maz@kernel.org Link: https://lore.kernel.org/r/YH87dtTfwYgavusz@xps13.dannf Cc: stable@vger.kernel.org Cc: Fu Wei wefu@redhat.com Reviewed-by: Sudeep Holla sudeep.holla@arm.com Tested-by: dann frazier dann.frazier@canonical.com Tested-by: Hanjun Guo guohanjun@huawei.com Reviewed-by: Hanjun Guo guohanjun@huawei.com Reviewed-by: Lorenzo Pieralisi lorenzo.pieralisi@arm.com Link: https://lore.kernel.org/r/20210421164317.1718831-2-maz@kernel.org Signed-off-by: Catalin Marinas catalin.marinas@arm.com Signed-off-by: Greg Kroah-Hartman gregkh@linuxfoundation.org Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- drivers/acpi/arm64/gtdt.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-)
diff --git a/drivers/acpi/arm64/gtdt.c b/drivers/acpi/arm64/gtdt.c index 92f9edf9d11ed..c39b36c558d6f 100644 --- a/drivers/acpi/arm64/gtdt.c +++ b/drivers/acpi/arm64/gtdt.c @@ -332,7 +332,7 @@ static int __init gtdt_import_sbsa_gwdt(struct acpi_gtdt_watchdog *wd, int index) { struct platform_device *pdev; - int irq = map_gt_gsi(wd->timer_interrupt, wd->timer_flags); + int irq;
/* * According to SBSA specification the size of refresh and control @@ -341,7 +341,7 @@ static int __init gtdt_import_sbsa_gwdt(struct acpi_gtdt_watchdog *wd, struct resource res[] = { DEFINE_RES_MEM(wd->control_frame_address, SZ_4K), DEFINE_RES_MEM(wd->refresh_frame_address, SZ_4K), - DEFINE_RES_IRQ(irq), + {}, }; int nr_res = ARRAY_SIZE(res);
@@ -351,10 +351,11 @@ static int __init gtdt_import_sbsa_gwdt(struct acpi_gtdt_watchdog *wd,
if (!(wd->refresh_frame_address && wd->control_frame_address)) { pr_err(FW_BUG "failed to get the Watchdog base address.\n"); - acpi_unregister_gsi(wd->timer_interrupt); return -EINVAL; }
+ irq = map_gt_gsi(wd->timer_interrupt, wd->timer_flags); + res[2] = (struct resource)DEFINE_RES_IRQ(irq); if (irq <= 0) { pr_warn("failed to map the Watchdog interrupt.\n"); nr_res--; @@ -367,7 +368,8 @@ static int __init gtdt_import_sbsa_gwdt(struct acpi_gtdt_watchdog *wd, */ pdev = platform_device_register_simple("sbsa-gwdt", index, res, nr_res); if (IS_ERR(pdev)) { - acpi_unregister_gsi(wd->timer_interrupt); + if (irq > 0) + acpi_unregister_gsi(wd->timer_interrupt); return PTR_ERR(pdev); }
From: Trond Myklebust trond.myklebust@hammerspace.com
stable inclusion from linux-4.19.191 commit d51316b13dd4f35aa737d20822a18c131eed32c3
--------------------------------
commit 39fd01863616964f009599e50ca5c6ea9ebf88d6 upstream.
If the pNFS layout segment is marked with the NFS_LSEG_LAYOUTRETURN flag, then the assumption is that it has some reporting requirement to perform through a layoutreturn (e.g. flexfiles layout stats or error information).
Fixes: e0b7d420f72a ("pNFS: Don't discard layout segments that are marked for return") Cc: stable@vger.kernel.org Signed-off-by: Trond Myklebust trond.myklebust@hammerspace.com Signed-off-by: Greg Kroah-Hartman gregkh@linuxfoundation.org Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- fs/nfs/pnfs.c | 5 +++++ 1 file changed, 5 insertions(+)
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index 55965e8e9a2ed..6feca545873d6 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -2359,6 +2359,9 @@ pnfs_mark_matching_lsegs_return(struct pnfs_layout_hdr *lo,
assert_spin_locked(&lo->plh_inode->i_lock);
+ if (test_bit(NFS_LAYOUT_RETURN_REQUESTED, &lo->plh_flags)) + tmp_list = &lo->plh_return_segs; + list_for_each_entry_safe(lseg, next, &lo->plh_segs, pls_list) if (pnfs_match_lseg_recall(lseg, return_range, seq)) { dprintk("%s: marking lseg %p iomode %d " @@ -2366,6 +2369,8 @@ pnfs_mark_matching_lsegs_return(struct pnfs_layout_hdr *lo, lseg, lseg->pls_range.iomode, lseg->pls_range.offset, lseg->pls_range.length); + if (test_bit(NFS_LSEG_LAYOUTRETURN, &lseg->pls_flags)) + tmp_list = &lo->plh_return_segs; if (mark_lseg_invalid(lseg, tmp_list)) continue; remaining++;
From: Trond Myklebust trond.myklebust@hammerspace.com
stable inclusion from linux-4.19.191 commit 9ffa7967f9379a0a1b924e9ffeda709d72237da7
--------------------------------
commit de144ff4234f935bd2150108019b5d87a90a8a96 upstream.
If the pNFS layout segment is marked with the NFS_LSEG_LAYOUTRETURN flag, then the assumption is that it has some reporting requirement to perform through a layoutreturn (e.g. flexfiles layout stats or error information).
Fixes: 6d597e175012 ("pnfs: only tear down lsegs that precede seqid in LAYOUTRETURN args") Cc: stable@vger.kernel.org Signed-off-by: Trond Myklebust trond.myklebust@hammerspace.com Signed-off-by: Greg Kroah-Hartman gregkh@linuxfoundation.org Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- fs/nfs/pnfs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index 6feca545873d6..af255109c5bf9 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -1294,7 +1294,7 @@ _pnfs_return_layout(struct inode *ino) } valid_layout = pnfs_layout_is_valid(lo); pnfs_clear_layoutcommit(ino, &tmp_list); - pnfs_mark_matching_lsegs_invalid(lo, &tmp_list, NULL, 0); + pnfs_mark_matching_lsegs_return(lo, &tmp_list, NULL, 0);
if (NFS_SERVER(ino)->pnfs_curr_ld->return_range) { struct pnfs_layout_range range = {
From: Paul Clements paul.clements@us.sios.com
stable inclusion from linux-4.19.191 commit a6e17cab00fc5bf85472434c52ac751426257c6f
--------------------------------
commit 2417b9869b81882ab90fd5ed1081a1cb2d4db1dd upstream.
This patch addresses a data corruption bug in raid1 arrays using bitmaps. Without this fix, the bitmap bits for the failed I/O end up being cleared.
Since we are in the failure leg of raid1_end_write_request, the request either needs to be retried (R1BIO_WriteError) or failed (R1BIO_Degraded).
Fixes: eeba6809d8d5 ("md/raid1: end bio when the device faulty") Cc: stable@vger.kernel.org # v5.2+ Signed-off-by: Paul Clements paul.clements@us.sios.com Signed-off-by: Song Liu song@kernel.org Signed-off-by: Greg Kroah-Hartman gregkh@linuxfoundation.org Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- drivers/md/raid1.c | 2 ++ 1 file changed, 2 insertions(+)
diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c index db0ec73993e80..cfc23bf440f84 100644 --- a/drivers/md/raid1.c +++ b/drivers/md/raid1.c @@ -445,6 +445,8 @@ static void raid1_end_write_request(struct bio *bio) if (!test_bit(Faulty, &rdev->flags)) set_bit(R1BIO_WriteError, &r1_bio->state); else { + /* Fail the request */ + set_bit(R1BIO_Degraded, &r1_bio->state); /* Finished with this branch */ r1_bio->bios[mirror] = NULL; to_put = bio;
From: Heinz Mauelshagen heinzm@redhat.com
stable inclusion from linux-4.19.191 commit 514666bb548d39b7a63814858f1be17becb49461
--------------------------------
commit f99a8e4373eeacb279bc9696937a55adbff7a28a upstream.
If fast table reloads occur during an ongoing reshape of raid4/5/6 devices the target may race reading a superblock vs the the MD resync thread; causing an inconclusive reshape state to be read in its constructor.
lvm2 test lvconvert-raid-reshape-stripes-load-reload.sh can cause BUG_ON() to trigger in md_run(), e.g.: "kernel BUG at drivers/md/raid5.c:7567!".
Scenario triggering the bug:
1. the MD sync thread calls end_reshape() from raid5_sync_request() when done reshaping. However end_reshape() _only_ updates the reshape position to MaxSector keeping the changed layout configuration though (i.e. any delta disks, chunk sector or RAID algorithm changes). That inconclusive configuration is stored in the superblock.
2. dm-raid constructs a mapping, loading named inconsistent superblock as of step 1 before step 3 is able to finish resetting the reshape state completely, and calls md_run() which leads to mentioned bug in raid5.c.
3. the MD RAID personality's finish_reshape() is called; which resets the reshape information on chunk sectors, delta disks, etc. This explains why the bug is rarely seen on multi-core machines, as MD's finish_reshape() superblock update races with the dm-raid constructor's superblock load in step 2.
Fix identifies inconclusive superblock content in the dm-raid constructor and resets it before calling md_run(), factoring out identifying checks into rs_is_layout_change() to share in existing rs_reshape_requested() and new rs_reset_inclonclusive_reshape(). Also enhance a comment and remove an empty line.
Cc: stable@vger.kernel.org Signed-off-by: Heinz Mauelshagen heinzm@redhat.com Signed-off-by: Mike Snitzer snitzer@redhat.com Signed-off-by: Greg Kroah-Hartman gregkh@linuxfoundation.org Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- drivers/md/dm-raid.c | 34 ++++++++++++++++++++++++++++------ 1 file changed, 28 insertions(+), 6 deletions(-)
diff --git a/drivers/md/dm-raid.c b/drivers/md/dm-raid.c index 23de59a692c51..b163329172208 100644 --- a/drivers/md/dm-raid.c +++ b/drivers/md/dm-raid.c @@ -1892,6 +1892,14 @@ static bool rs_takeover_requested(struct raid_set *rs) return rs->md.new_level != rs->md.level; }
+/* True if layout is set to reshape. */ +static bool rs_is_layout_change(struct raid_set *rs, bool use_mddev) +{ + return (use_mddev ? rs->md.delta_disks : rs->delta_disks) || + rs->md.new_layout != rs->md.layout || + rs->md.new_chunk_sectors != rs->md.chunk_sectors; +} + /* True if @rs is requested to reshape by ctr */ static bool rs_reshape_requested(struct raid_set *rs) { @@ -1904,9 +1912,7 @@ static bool rs_reshape_requested(struct raid_set *rs) if (rs_is_raid0(rs)) return false;
- change = mddev->new_layout != mddev->layout || - mddev->new_chunk_sectors != mddev->chunk_sectors || - rs->delta_disks; + change = rs_is_layout_change(rs, false);
/* Historical case to support raid1 reshape without delta disks */ if (rs_is_raid1(rs)) { @@ -2843,7 +2849,7 @@ static sector_t _get_reshape_sectors(struct raid_set *rs) }
/* - * + * Reshape: * - change raid layout * - change chunk size * - add disks @@ -2952,6 +2958,20 @@ static int rs_setup_reshape(struct raid_set *rs) return r; }
+/* + * If the md resync thread has updated superblock with max reshape position + * at the end of a reshape but not (yet) reset the layout configuration + * changes -> reset the latter. + */ +static void rs_reset_inconclusive_reshape(struct raid_set *rs) +{ + if (!rs_is_reshaping(rs) && rs_is_layout_change(rs, true)) { + rs_set_cur(rs); + rs->md.delta_disks = 0; + rs->md.reshape_backwards = 0; + } +} + /* * Enable/disable discard support on RAID set depending on * RAID level and discard properties of underlying RAID members. @@ -3221,11 +3241,14 @@ static int raid_ctr(struct dm_target *ti, unsigned int argc, char **argv) if (r) goto bad;
+ /* Catch any inconclusive reshape superblock content. */ + rs_reset_inconclusive_reshape(rs); + /* Start raid set read-only and assumed clean to change in raid_resume() */ rs->md.ro = 1; rs->md.in_sync = 1;
- /* Keep array frozen */ + /* Keep array frozen until resume. */ set_bit(MD_RECOVERY_FROZEN, &rs->md.recovery);
/* Has to be held on running the array */ @@ -3239,7 +3262,6 @@ static int raid_ctr(struct dm_target *ti, unsigned int argc, char **argv) }
r = md_start(&rs->md); - if (r) { ti->error = "Failed to start raid array"; mddev_unlock(&rs->md);
From: Thomas Gleixner tglx@linutronix.de
stable inclusion from linux-4.19.191 commit f724a7453bbfb8bb917735e315d5d902c0b67c33
--------------------------------
commit 4fbf5d6837bf81fd7a27d771358f4ee6c4f243f8 upstream.
The FUTEX_WAIT operand has historically a relative timeout which means that the clock id is irrelevant as relative timeouts on CLOCK_REALTIME are not subject to wall clock changes and therefore are mapped by the kernel to CLOCK_MONOTONIC for simplicity.
If a caller would set FUTEX_CLOCK_REALTIME for FUTEX_WAIT the timeout is still treated relative vs. CLOCK_MONOTONIC and then the wait arms that timeout based on CLOCK_REALTIME which is broken and obviously has never been used or even tested.
Reject any attempt to use FUTEX_CLOCK_REALTIME with FUTEX_WAIT again.
The desired functionality can be achieved with FUTEX_WAIT_BITSET and a FUTEX_BITSET_MATCH_ANY argument.
Fixes: 337f13046ff0 ("futex: Allow FUTEX_CLOCK_REALTIME with FUTEX_WAIT op") Signed-off-by: Thomas Gleixner tglx@linutronix.de Acked-by: Peter Zijlstra (Intel) peterz@infradead.org Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20210422194704.834797921@linutronix.de Signed-off-by: Greg Kroah-Hartman gregkh@linuxfoundation.org Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- kernel/futex.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-)
diff --git a/kernel/futex.c b/kernel/futex.c index eeb1ac8b5bc6e..13574134bbc44 100644 --- a/kernel/futex.c +++ b/kernel/futex.c @@ -3872,8 +3872,7 @@ long do_futex(u32 __user *uaddr, int op, u32 val, ktime_t *timeout,
if (op & FUTEX_CLOCK_REALTIME) { flags |= FLAGS_CLOCKRT; - if (cmd != FUTEX_WAIT && cmd != FUTEX_WAIT_BITSET && \ - cmd != FUTEX_WAIT_REQUEUE_PI) + if (cmd != FUTEX_WAIT_BITSET && cmd != FUTEX_WAIT_REQUEUE_PI) return -ENOSYS; }
From: Chen Jun chenjun102@huawei.com
stable inclusion from linux-4.19.191 commit 1a852780d9d93378ae4f7d7d57213bc114c6873f
--------------------------------
commit 2d036dfa5f10df9782f5278fc591d79d283c1fad upstream.
The return value on success (>= 0) is overwritten by the return value of put_old_timex32(). That works correct in the fault case, but is wrong for the success case where put_old_timex32() returns 0.
Just check the return value of put_old_timex32() and return -EFAULT in case it is not zero.
[ tglx: Massage changelog ]
Fixes: 3a4d44b61625 ("ntp: Move adjtimex related compat syscalls to native counterparts") Signed-off-by: Chen Jun chenjun102@huawei.com Signed-off-by: Thomas Gleixner tglx@linutronix.de Reviewed-by: Richard Cochran richardcochran@gmail.com Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20210414030449.90692-1-chenjun102@huawei.com Signed-off-by: Greg Kroah-Hartman gregkh@linuxfoundation.org Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- kernel/time/posix-timers.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/kernel/time/posix-timers.c b/kernel/time/posix-timers.c index 977bb6b78b0cc..b0bda93c595d8 100644 --- a/kernel/time/posix-timers.c +++ b/kernel/time/posix-timers.c @@ -1174,8 +1174,8 @@ COMPAT_SYSCALL_DEFINE2(clock_adjtime, clockid_t, which_clock,
err = kc->clock_adj(which_clock, &ktx);
- if (err >= 0) - err = compat_put_timex(utp, &ktx); + if (err >= 0 && compat_put_timex(utp, &ktx)) + return -EFAULT;
return err; }
From: Fengnan Chang changfengnan@vivo.com
stable inclusion from linux-4.19.191 commit 353b8d3bd324ecc3097d09d8c405d25e0f75ec7f
--------------------------------
commit f88f1466e2a2e5ca17dfada436d3efa1b03a3972 upstream.
We should set the error code when ext4_commit_super check argument failed. Found in code review. Fixes: c4be0c1dc4cdc ("filesystem freeze: add error handling of write_super_lockfs/unlockfs").
Cc: stable@kernel.org Signed-off-by: Fengnan Chang changfengnan@vivo.com Reviewed-by: Andreas Dilger adilger@dilger.ca Link: https://lore.kernel.org/r/20210402101631.561-1-changfengnan@vivo.com Signed-off-by: Theodore Ts'o tytso@mit.edu Signed-off-by: Greg Kroah-Hartman gregkh@linuxfoundation.org Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- fs/ext4/super.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-)
diff --git a/fs/ext4/super.c b/fs/ext4/super.c index fcb1f5d3b09ec..a964ed63601c6 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -5247,8 +5247,10 @@ static int ext4_commit_super(struct super_block *sb) struct buffer_head *sbh = EXT4_SB(sb)->s_sbh; int error = 0;
- if (!sbh || block_device_ejected(sb)) - return error; + if (!sbh) + return -EINVAL; + if (block_device_ejected(sb)) + return -ENODEV;
ext4_update_super(sb);
From: Pavel Skripkin paskripkin@gmail.com
stable inclusion from linux-4.19.191 commit 3ce3062bcf01de94815605f1b6fd916cb84ff08b
--------------------------------
commit 211b4d42b70f1c1660feaa968dac0efc2a96ac4d upstream.
syzbot reported memory leak in tty/vt. The problem was in VT_DISALLOCATE ioctl cmd. After allocating unimap with PIO_UNIMAP it wasn't freed via VT_DISALLOCATE, but vc_cons[currcons].d was zeroed.
Reported-by: syzbot+bcc922b19ccc64240b42@syzkaller.appspotmail.com Signed-off-by: Pavel Skripkin paskripkin@gmail.com Cc: stable stable@vger.kernel.org Link: https://lore.kernel.org/r/20210327214443.21548-1-paskripkin@gmail.com Signed-off-by: Greg Kroah-Hartman gregkh@linuxfoundation.org Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- drivers/tty/vt/vt.c | 1 + 1 file changed, 1 insertion(+)
diff --git a/drivers/tty/vt/vt.c b/drivers/tty/vt/vt.c index 13ea0579f104c..1fad4978a3b41 100644 --- a/drivers/tty/vt/vt.c +++ b/drivers/tty/vt/vt.c @@ -1380,6 +1380,7 @@ struct vc_data *vc_deallocate(unsigned int currcons) atomic_notifier_call_chain(&vt_notifier_list, VT_DEALLOCATE, ¶m); vcs_remove_sysfs(currcons); visual_deinit(vc); + con_free_unimap(vc); put_pid(vc->vt_pid); vc_uniscr_set(vc, NULL); kfree(vc->vc_screenbuf);
From: "Steven Rostedt (VMware)" rostedt@goodmis.org
stable inclusion from linux-4.19.191 commit 92c52762c295602b2606181ff46e0de67a6e1417
--------------------------------
commit 785e3c0a3a870e72dc530856136ab4c8dd207128 upstream.
The default max PID is set by PID_MAX_DEFAULT, and the tracing infrastructure uses this number to map PIDs to the comm names of the tasks, such output of the trace can show names from the recorded PIDs in the ring buffer. This mapping is also exported to user space via the "saved_cmdlines" file in the tracefs directory.
But currently the mapping expects the PIDs to be less than PID_MAX_DEFAULT, which is the default maximum and not the real maximum. Recently, systemd will increases the maximum value of a PID on the system, and when tasks are traced that have a PID higher than PID_MAX_DEFAULT, its comm is not recorded. This leads to the entire trace to have "<...>" as the comm name, which is pretty useless.
Instead, keep the array mapping the size of PID_MAX_DEFAULT, but instead of just mapping the index to the comm, map a mask of the PID (PID_MAX_DEFAULT - 1) to the comm, and find the full PID from the map_cmdline_to_pid array (that already exists).
This bug goes back to the beginning of ftrace, but hasn't been an issue until user space started increasing the maximum value of PIDs.
Link: https://lkml.kernel.org/r/20210427113207.3c601884@gandalf.local.home
Cc: stable@vger.kernel.org Fixes: bc0c38d139ec7 ("ftrace: latency tracer infrastructure") Signed-off-by: Steven Rostedt (VMware) rostedt@goodmis.org Signed-off-by: Greg Kroah-Hartman gregkh@linuxfoundation.org Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- kernel/trace/trace.c | 41 +++++++++++++++-------------------------- 1 file changed, 15 insertions(+), 26 deletions(-)
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index ee2f94222863c..cec7e588c9a94 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -1933,14 +1933,13 @@ static void tracing_stop_tr(struct trace_array *tr)
static int trace_save_cmdline(struct task_struct *tsk) { - unsigned pid, idx; + unsigned tpid, idx;
/* treat recording of idle task as a success */ if (!tsk->pid) return 1;
- if (unlikely(tsk->pid > PID_MAX_DEFAULT)) - return 0; + tpid = tsk->pid & (PID_MAX_DEFAULT - 1);
/* * It's not the end of the world if we don't get @@ -1951,26 +1950,15 @@ static int trace_save_cmdline(struct task_struct *tsk) if (!arch_spin_trylock(&trace_cmdline_lock)) return 0;
- idx = savedcmd->map_pid_to_cmdline[tsk->pid]; + idx = savedcmd->map_pid_to_cmdline[tpid]; if (idx == NO_CMDLINE_MAP) { idx = (savedcmd->cmdline_idx + 1) % savedcmd->cmdline_num;
- /* - * Check whether the cmdline buffer at idx has a pid - * mapped. We are going to overwrite that entry so we - * need to clear the map_pid_to_cmdline. Otherwise we - * would read the new comm for the old pid. - */ - pid = savedcmd->map_cmdline_to_pid[idx]; - if (pid != NO_CMDLINE_MAP) - savedcmd->map_pid_to_cmdline[pid] = NO_CMDLINE_MAP; - - savedcmd->map_cmdline_to_pid[idx] = tsk->pid; - savedcmd->map_pid_to_cmdline[tsk->pid] = idx; - + savedcmd->map_pid_to_cmdline[tpid] = idx; savedcmd->cmdline_idx = idx; }
+ savedcmd->map_cmdline_to_pid[idx] = tsk->pid; set_cmdline(idx, tsk->comm);
arch_spin_unlock(&trace_cmdline_lock); @@ -1981,6 +1969,7 @@ static int trace_save_cmdline(struct task_struct *tsk) static void __trace_find_cmdline(int pid, char comm[]) { unsigned map; + int tpid;
if (!pid) { strcpy(comm, "<idle>"); @@ -1992,16 +1981,16 @@ static void __trace_find_cmdline(int pid, char comm[]) return; }
- if (pid > PID_MAX_DEFAULT) { - strcpy(comm, "<...>"); - return; + tpid = pid & (PID_MAX_DEFAULT - 1); + map = savedcmd->map_pid_to_cmdline[tpid]; + if (map != NO_CMDLINE_MAP) { + tpid = savedcmd->map_cmdline_to_pid[map]; + if (tpid == pid) { + strlcpy(comm, get_saved_cmdlines(map), TASK_COMM_LEN); + return; + } } - - map = savedcmd->map_pid_to_cmdline[pid]; - if (map != NO_CMDLINE_MAP) - strlcpy(comm, get_saved_cmdlines(map), TASK_COMM_LEN); - else - strcpy(comm, "<...>"); + strcpy(comm, "<...>"); }
void trace_find_cmdline(int pid, char comm[])
From: "Steven Rostedt (VMware)" rostedt@goodmis.org
stable inclusion from linux-4.19.191 commit d43d56dbf452ccecc1ec735cd4b6840118005d7c
--------------------------------
commit aafe104aa9096827a429bc1358f8260ee565b7cc upstream.
It was reported that a fix to the ring buffer recursion detection would cause a hung machine when performing suspend / resume testing. The following backtrace was extracted from debugging that case:
Call Trace: trace_clock_global+0x91/0xa0 __rb_reserve_next+0x237/0x460 ring_buffer_lock_reserve+0x12a/0x3f0 trace_buffer_lock_reserve+0x10/0x50 __trace_graph_return+0x1f/0x80 trace_graph_return+0xb7/0xf0 ? trace_clock_global+0x91/0xa0 ftrace_return_to_handler+0x8b/0xf0 ? pv_hash+0xa0/0xa0 return_to_handler+0x15/0x30 ? ftrace_graph_caller+0xa0/0xa0 ? trace_clock_global+0x91/0xa0 ? __rb_reserve_next+0x237/0x460 ? ring_buffer_lock_reserve+0x12a/0x3f0 ? trace_event_buffer_lock_reserve+0x3c/0x120 ? trace_event_buffer_reserve+0x6b/0xc0 ? trace_event_raw_event_device_pm_callback_start+0x125/0x2d0 ? dpm_run_callback+0x3b/0xc0 ? pm_ops_is_empty+0x50/0x50 ? platform_get_irq_byname_optional+0x90/0x90 ? trace_device_pm_callback_start+0x82/0xd0 ? dpm_run_callback+0x49/0xc0
With the following RIP:
RIP: 0010:native_queued_spin_lock_slowpath+0x69/0x200
Since the fix to the recursion detection would allow a single recursion to happen while tracing, this lead to the trace_clock_global() taking a spin lock and then trying to take it again:
ring_buffer_lock_reserve() { trace_clock_global() { arch_spin_lock() { queued_spin_lock_slowpath() { /* lock taken */ (something else gets traced by function graph tracer) ring_buffer_lock_reserve() { trace_clock_global() { arch_spin_lock() { queued_spin_lock_slowpath() { /* DEAD LOCK! */
Tracing should *never* block, as it can lead to strange lockups like the above.
Restructure the trace_clock_global() code to instead of simply taking a lock to update the recorded "prev_time" simply use it, as two events happening on two different CPUs that calls this at the same time, really doesn't matter which one goes first. Use a trylock to grab the lock for updating the prev_time, and if it fails, simply try again the next time. If it failed to be taken, that means something else is already updating it.
Link: https://lkml.kernel.org/r/20210430121758.650b6e8a@gandalf.local.home
Cc: stable@vger.kernel.org Tested-by: Konstantin Kharlamov hi-angel@yandex.ru Tested-by: Todd Brandt todd.e.brandt@linux.intel.com Fixes: b02414c8f045 ("ring-buffer: Fix recursion protection transitions between interrupt context") # started showing the problem Fixes: 14131f2f98ac3 ("tracing: implement trace_clock_*() APIs") # where the bug happened Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=212761 Signed-off-by: Steven Rostedt (VMware) rostedt@goodmis.org Signed-off-by: Greg Kroah-Hartman gregkh@linuxfoundation.org Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- kernel/trace/trace_clock.c | 44 ++++++++++++++++++++++++++------------ 1 file changed, 30 insertions(+), 14 deletions(-)
diff --git a/kernel/trace/trace_clock.c b/kernel/trace/trace_clock.c index aaf6793ededaa..c1637f90c8a38 100644 --- a/kernel/trace/trace_clock.c +++ b/kernel/trace/trace_clock.c @@ -95,33 +95,49 @@ u64 notrace trace_clock_global(void) { unsigned long flags; int this_cpu; - u64 now; + u64 now, prev_time;
raw_local_irq_save(flags);
this_cpu = raw_smp_processor_id(); - now = sched_clock_cpu(this_cpu); + /* - * If in an NMI context then dont risk lockups and return the - * cpu_clock() time: + * The global clock "guarantees" that the events are ordered + * between CPUs. But if two events on two different CPUS call + * trace_clock_global at roughly the same time, it really does + * not matter which one gets the earlier time. Just make sure + * that the same CPU will always show a monotonic clock. + * + * Use a read memory barrier to get the latest written + * time that was recorded. */ - if (unlikely(in_nmi())) - goto out; + smp_rmb(); + prev_time = READ_ONCE(trace_clock_struct.prev_time); + now = sched_clock_cpu(this_cpu);
- arch_spin_lock(&trace_clock_struct.lock); + /* Make sure that now is always greater than prev_time */ + if ((s64)(now - prev_time) < 0) + now = prev_time + 1;
/* - * TODO: if this happens often then maybe we should reset - * my_scd->clock to prev_time+1, to make sure - * we start ticking with the local clock from now on? + * If in an NMI context then dont risk lockups and simply return + * the current time. */ - if ((s64)(now - trace_clock_struct.prev_time) < 0) - now = trace_clock_struct.prev_time + 1; + if (unlikely(in_nmi())) + goto out;
- trace_clock_struct.prev_time = now; + /* Tracing can cause strange recursion, always use a try lock */ + if (arch_spin_trylock(&trace_clock_struct.lock)) { + /* Reread prev_time in case it was already updated */ + prev_time = READ_ONCE(trace_clock_struct.prev_time); + if ((s64)(now - prev_time) < 0) + now = prev_time + 1;
- arch_spin_unlock(&trace_clock_struct.lock); + trace_clock_struct.prev_time = now;
+ /* The unlock acts as the wmb for the above rmb */ + arch_spin_unlock(&trace_clock_struct.lock); + } out: raw_local_irq_restore(flags);
From: Joe Thornber ejt@redhat.com
stable inclusion from linux-4.19.191 commit 77aab16c6185f15fedccf72094d6d9a59dadadba
--------------------------------
commit a88b2358f1da2c9f9fcc432f2e0a79617fea397c upstream.
Otherwise most non-x86 architectures (e.g. riscv, arm) will resort to byte-by-byte access.
Cc: stable@vger.kernel.org Signed-off-by: Joe Thornber ejt@redhat.com Signed-off-by: Mike Snitzer snitzer@redhat.com Signed-off-by: Greg Kroah-Hartman gregkh@linuxfoundation.org Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- drivers/md/persistent-data/dm-btree-internal.h | 4 ++-- drivers/md/persistent-data/dm-space-map-common.h | 8 ++++---- 2 files changed, 6 insertions(+), 6 deletions(-)
diff --git a/drivers/md/persistent-data/dm-btree-internal.h b/drivers/md/persistent-data/dm-btree-internal.h index a240990a7f333..5673f8eb5f88f 100644 --- a/drivers/md/persistent-data/dm-btree-internal.h +++ b/drivers/md/persistent-data/dm-btree-internal.h @@ -34,12 +34,12 @@ struct node_header { __le32 max_entries; __le32 value_size; __le32 padding; -} __packed; +} __attribute__((packed, aligned(8)));
struct btree_node { struct node_header header; __le64 keys[0]; -} __packed; +} __attribute__((packed, aligned(8)));
/* diff --git a/drivers/md/persistent-data/dm-space-map-common.h b/drivers/md/persistent-data/dm-space-map-common.h index 8de63ce39bdd5..87e17909ef521 100644 --- a/drivers/md/persistent-data/dm-space-map-common.h +++ b/drivers/md/persistent-data/dm-space-map-common.h @@ -33,7 +33,7 @@ struct disk_index_entry { __le64 blocknr; __le32 nr_free; __le32 none_free_before; -} __packed; +} __attribute__ ((packed, aligned(8)));
#define MAX_METADATA_BITMAPS 255 @@ -43,7 +43,7 @@ struct disk_metadata_index { __le64 blocknr;
struct disk_index_entry index[MAX_METADATA_BITMAPS]; -} __packed; +} __attribute__ ((packed, aligned(8)));
struct ll_disk;
@@ -86,7 +86,7 @@ struct disk_sm_root { __le64 nr_allocated; __le64 bitmap_root; __le64 ref_count_root; -} __packed; +} __attribute__ ((packed, aligned(8)));
#define ENTRIES_PER_BYTE 4
@@ -94,7 +94,7 @@ struct disk_bitmap_header { __le32 csum; __le32 not_used; __le64 blocknr; -} __packed; +} __attribute__ ((packed, aligned(8)));
enum allocation_event { SM_NONE,
From: Joe Thornber ejt@redhat.com
stable inclusion from linux-4.19.191 commit 7cba7ebfd905cae6a50f548477a0e17ccd176ddf
--------------------------------
commit 5208692e80a1f3c8ce2063a22b675dd5589d1d80 upstream.
This division bug meant the search for free metadata space could skip the final allocation bitmap's worth of entries. Fix affects DM thinp, cache and era targets.
Cc: stable@vger.kernel.org Signed-off-by: Joe Thornber ejt@redhat.com Tested-by: Ming-Hung Tsai mtsai@redhat.com Signed-off-by: Mike Snitzer snitzer@redhat.com Signed-off-by: Greg Kroah-Hartman gregkh@linuxfoundation.org Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- drivers/md/persistent-data/dm-space-map-common.c | 2 ++ 1 file changed, 2 insertions(+)
diff --git a/drivers/md/persistent-data/dm-space-map-common.c b/drivers/md/persistent-data/dm-space-map-common.c index 17aef55ed708a..a284762e548e1 100644 --- a/drivers/md/persistent-data/dm-space-map-common.c +++ b/drivers/md/persistent-data/dm-space-map-common.c @@ -337,6 +337,8 @@ int sm_ll_find_free_block(struct ll_disk *ll, dm_block_t begin, */ begin = do_div(index_begin, ll->entries_per_block); end = do_div(end, ll->entries_per_block); + if (end == 0) + end = ll->entries_per_block;
for (i = index_begin; i < index_end; i++, begin = 0) { struct dm_block *blk;
From: Benjamin Block bblock@linux.ibm.com
stable inclusion from linux-4.19.191 commit 772b9f59657665af3b68d24d12b9d172d31f0dfb
--------------------------------
commit 8e947c8f4a5620df77e43c9c75310dc510250166 upstream.
When loading a device-mapper table for a request-based mapped device, and the allocation/initialization of the blk_mq_tag_set for the device fails, a following device remove will cause a double free.
E.g. (dmesg): device-mapper: core: Cannot initialize queue for request-based dm-mq mapped device device-mapper: ioctl: unable to set up device queue for new table. Unable to handle kernel pointer dereference in virtual kernel address space Failing address: 0305e098835de000 TEID: 0305e098835de803 Fault in home space mode while using kernel ASCE. AS:000000025efe0007 R3:0000000000000024 Oops: 0038 ilc:3 [#1] SMP Modules linked in: ... lots of modules ... Supported: Yes, External CPU: 0 PID: 7348 Comm: multipathd Kdump: loaded Tainted: G W X 5.3.18-53-default #1 SLE15-SP3 Hardware name: IBM 8561 T01 7I2 (LPAR) Krnl PSW : 0704e00180000000 000000025e368eca (kfree+0x42/0x330) R:0 T:1 IO:1 EX:1 Key:0 M:1 W:0 P:0 AS:3 CC:2 PM:0 RI:0 EA:3 Krnl GPRS: 000000000000004a 000000025efe5230 c1773200d779968d 0000000000000000 000000025e520270 000000025e8d1b40 0000000000000003 00000007aae10000 000000025e5202a2 0000000000000001 c1773200d779968d 0305e098835de640 00000007a8170000 000003ff80138650 000000025e5202a2 000003e00396faa8 Krnl Code: 000000025e368eb8: c4180041e100 lgrl %r1,25eba50b8 000000025e368ebe: ecba06b93a55 risbg %r11,%r10,6,185,58 #000000025e368ec4: e3b010000008 ag %r11,0(%r1) >000000025e368eca: e310b0080004 lg %r1,8(%r11) 000000025e368ed0: a7110001 tmll %r1,1 000000025e368ed4: a7740129 brc 7,25e369126 000000025e368ed8: e320b0080004 lg %r2,8(%r11) 000000025e368ede: b904001b lgr %r1,%r11 Call Trace: [<000000025e368eca>] kfree+0x42/0x330 [<000000025e5202a2>] blk_mq_free_tag_set+0x72/0xb8 [<000003ff801316a8>] dm_mq_cleanup_mapped_device+0x38/0x50 [dm_mod] [<000003ff80120082>] free_dev+0x52/0xd0 [dm_mod] [<000003ff801233f0>] __dm_destroy+0x150/0x1d0 [dm_mod] [<000003ff8012bb9a>] dev_remove+0x162/0x1c0 [dm_mod] [<000003ff8012a988>] ctl_ioctl+0x198/0x478 [dm_mod] [<000003ff8012ac8a>] dm_ctl_ioctl+0x22/0x38 [dm_mod] [<000000025e3b11ee>] ksys_ioctl+0xbe/0xe0 [<000000025e3b127a>] __s390x_sys_ioctl+0x2a/0x40 [<000000025e8c15ac>] system_call+0xd8/0x2c8 Last Breaking-Event-Address: [<000000025e52029c>] blk_mq_free_tag_set+0x6c/0xb8 Kernel panic - not syncing: Fatal exception: panic_on_oops
When allocation/initialization of the blk_mq_tag_set fails in dm_mq_init_request_queue(), it is uninitialized/freed, but the pointer is not reset to NULL; so when dev_remove() later gets into dm_mq_cleanup_mapped_device() it sees the pointer and tries to uninitialize and free it again.
Fix this by setting the pointer to NULL in dm_mq_init_request_queue() error-handling. Also set it to NULL in dm_mq_cleanup_mapped_device().
Cc: stable@vger.kernel.org # 4.6+ Fixes: 1c357a1e86a4 ("dm: allocate blk_mq_tag_set rather than embed in mapped_device") Signed-off-by: Benjamin Block bblock@linux.ibm.com Signed-off-by: Mike Snitzer snitzer@redhat.com Signed-off-by: Greg Kroah-Hartman gregkh@linuxfoundation.org Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- drivers/md/dm-rq.c | 2 ++ 1 file changed, 2 insertions(+)
diff --git a/drivers/md/dm-rq.c b/drivers/md/dm-rq.c index 4d36373e1c0f0..24d0b2ac07c5c 100644 --- a/drivers/md/dm-rq.c +++ b/drivers/md/dm-rq.c @@ -834,6 +834,7 @@ int dm_mq_init_request_queue(struct mapped_device *md, struct dm_table *t) blk_mq_free_tag_set(md->tag_set); out_kfree_tag_set: kfree(md->tag_set); + md->tag_set = NULL;
return err; } @@ -843,6 +844,7 @@ void dm_mq_cleanup_mapped_device(struct mapped_device *md) if (md->tag_set) { blk_mq_free_tag_set(md->tag_set); kfree(md->tag_set); + md->tag_set = NULL; } }
From: Sudhakar Panneerselvam sudhakar.panneerselvam@oracle.com
stable inclusion from linux-4.19.191 commit 945eec9f2aa1510f625b599cae66000f8b091211
--------------------------------
commit 404a8ef512587b2460107d3272c17a89aef75edf upstream.
NULL pointer dereference was observed in super_written() when it tries to access the mddev structure.
[The below stack trace is from an older kernel, but the problem described in this patch applies to the mainline kernel.]
[ 1194.474861] task: ffff8fdd20858000 task.stack: ffffb99d40790000 [ 1194.488000] RIP: 0010:super_written+0x29/0xe1 [ 1194.499688] RSP: 0018:ffff8ffb7fcc3c78 EFLAGS: 00010046 [ 1194.512477] RAX: 0000000000000000 RBX: ffff8ffb7bf4a000 RCX: ffff8ffb78991048 [ 1194.527325] RDX: 0000000000000001 RSI: 0000000000000000 RDI: ffff8ffb56b8a200 [ 1194.542576] RBP: ffff8ffb7fcc3c90 R08: 000000000000000b R09: 0000000000000000 [ 1194.558001] R10: ffff8ffb56b8a298 R11: 0000000000000000 R12: ffff8ffb56b8a200 [ 1194.573070] R13: 0000000000000000 R14: 0000000000000000 R15: 0000000000000000 [ 1194.588117] FS: 0000000000000000(0000) GS:ffff8ffb7fcc0000(0000) knlGS:0000000000000000 [ 1194.604264] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 1194.617375] CR2: 00000000000002b8 CR3: 00000021e040a002 CR4: 00000000007606e0 [ 1194.632327] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [ 1194.647865] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [ 1194.663316] PKRU: 55555554 [ 1194.674090] Call Trace: [ 1194.683735] <IRQ> [ 1194.692948] bio_endio+0xae/0x135 [ 1194.703580] blk_update_request+0xad/0x2fa [ 1194.714990] blk_update_bidi_request+0x20/0x72 [ 1194.726578] __blk_end_bidi_request+0x2c/0x4d [ 1194.738373] __blk_end_request_all+0x31/0x49 [ 1194.749344] blk_flush_complete_seq+0x377/0x383 [ 1194.761550] flush_end_io+0x1dd/0x2a7 [ 1194.772910] blk_finish_request+0x9f/0x13c [ 1194.784544] scsi_end_request+0x180/0x25c [ 1194.796149] scsi_io_completion+0xc8/0x610 [ 1194.807503] scsi_finish_command+0xdc/0x125 [ 1194.818897] scsi_softirq_done+0x81/0xde [ 1194.830062] blk_done_softirq+0xa4/0xcc [ 1194.841008] __do_softirq+0xd9/0x29f [ 1194.851257] irq_exit+0xe6/0xeb [ 1194.861290] do_IRQ+0x59/0xe3 [ 1194.871060] common_interrupt+0x1c6/0x382 [ 1194.881988] </IRQ> [ 1194.890646] RIP: 0010:cpuidle_enter_state+0xdd/0x2a5 [ 1194.902532] RSP: 0018:ffffb99d40793e68 EFLAGS: 00000246 ORIG_RAX: ffffffffffffff43 [ 1194.917317] RAX: ffff8ffb7fce27c0 RBX: ffff8ffb7fced800 RCX: 000000000000001f [ 1194.932056] RDX: 0000000000000000 RSI: 0000000000000004 RDI: 0000000000000000 [ 1194.946428] RBP: ffffb99d40793ea0 R08: 0000000000000004 R09: 0000000000002ed2 [ 1194.960508] R10: 0000000000002664 R11: 0000000000000018 R12: 0000000000000003 [ 1194.974454] R13: 000000000000000b R14: ffffffff925715a0 R15: 0000011610120d5a [ 1194.988607] ? cpuidle_enter_state+0xcc/0x2a5 [ 1194.999077] cpuidle_enter+0x17/0x19 [ 1195.008395] call_cpuidle+0x23/0x3a [ 1195.017718] do_idle+0x172/0x1d5 [ 1195.026358] cpu_startup_entry+0x73/0x75 [ 1195.035769] start_secondary+0x1b9/0x20b [ 1195.044894] secondary_startup_64+0xa5/0xa5 [ 1195.084921] RIP: super_written+0x29/0xe1 RSP: ffff8ffb7fcc3c78 [ 1195.096354] CR2: 00000000000002b8
bio in the above stack is a bitmap write whose completion is invoked after the tear down sequence sets the mddev structure to NULL in rdev.
During tear down, there is an attempt to flush the bitmap writes, but for external bitmaps, there is no explicit wait for all the bitmap writes to complete. For instance, md_bitmap_flush() is called to flush the bitmap writes, but the last call to md_bitmap_daemon_work() in md_bitmap_flush() could generate new bitmap writes for which there is no explicit wait to complete those writes. The call to md_bitmap_update_sb() will return simply for external bitmaps and the follow-up call to md_update_sb() is conditional and may not get called for external bitmaps. This results in a kernel panic when the completion routine, super_written() is called which tries to reference mddev in the rdev that has been set to NULL(in unbind_rdev_from_array() by tear down sequence).
The solution is to call md_super_wait() for external bitmaps after the last call to md_bitmap_daemon_work() in md_bitmap_flush() to ensure there are no pending bitmap writes before proceeding with the tear down.
Cc: stable@vger.kernel.org Signed-off-by: Sudhakar Panneerselvam sudhakar.panneerselvam@oracle.com Reviewed-by: Zhao Heming heming.zhao@suse.com Signed-off-by: Song Liu song@kernel.org Signed-off-by: Greg Kroah-Hartman gregkh@linuxfoundation.org Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- drivers/md/md-bitmap.c | 2 ++ 1 file changed, 2 insertions(+)
diff --git a/drivers/md/md-bitmap.c b/drivers/md/md-bitmap.c index 81631e17f4a04..f05417673bf2a 100644 --- a/drivers/md/md-bitmap.c +++ b/drivers/md/md-bitmap.c @@ -1725,6 +1725,8 @@ void md_bitmap_flush(struct mddev *mddev) md_bitmap_daemon_work(mddev); bitmap->daemon_lastrun -= sleep; md_bitmap_daemon_work(mddev); + if (mddev->bitmap_info.external) + md_super_wait(mddev); md_bitmap_update_sb(bitmap); }
From: Heming Zhao heming.zhao@suse.com
stable inclusion from linux-4.19.191 commit 121243301effabf451294a4a77a582f3925dafe3
--------------------------------
commit f7c7a2f9a23e5b6e0f5251f29648d0238bb7757e upstream.
md_kick_rdev_from_array will remove rdev, so we should use rdev_for_each_safe to search list.
How to trigger:
env: Two nodes on kvm-qemu x86_64 VMs (2C2G with 2 iscsi luns).
``` node2=192.168.0.3
for i in {1..20}; do echo ==== $i `date` ====;
mdadm -Ss && ssh ${node2} "mdadm -Ss" wipefs -a /dev/sda /dev/sdb
mdadm -CR /dev/md0 -b clustered -e 1.2 -n 2 -l 1 /dev/sda \ /dev/sdb --assume-clean ssh ${node2} "mdadm -A /dev/md0 /dev/sda /dev/sdb" mdadm --wait /dev/md0 ssh ${node2} "mdadm --wait /dev/md0"
mdadm --manage /dev/md0 --fail /dev/sda --remove /dev/sda sleep 1 done ```
Crash stack:
``` stack segment: 0000 [#1] SMP ... ... RIP: 0010:md_check_recovery+0x1e8/0x570 [md_mod] ... ... RSP: 0018:ffffb149807a7d68 EFLAGS: 00010207 RAX: 0000000000000000 RBX: ffff9d494c180800 RCX: ffff9d490fc01e50 RDX: fffff047c0ed8308 RSI: 0000000000000246 RDI: 0000000000000246 RBP: 6b6b6b6b6b6b6b6b R08: ffff9d490fc01e40 R09: 0000000000000000 R10: 0000000000000001 R11: 0000000000000001 R12: 0000000000000000 R13: ffff9d494c180818 R14: ffff9d493399ef38 R15: ffff9d4933a1d800 FS: 0000000000000000(0000) GS:ffff9d494f700000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007fe68cab9010 CR3: 000000004c6be001 CR4: 00000000003706e0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: raid1d+0x5c/0xd40 [raid1] ? finish_task_switch+0x75/0x2a0 ? lock_timer_base+0x67/0x80 ? try_to_del_timer_sync+0x4d/0x80 ? del_timer_sync+0x41/0x50 ? schedule_timeout+0x254/0x2d0 ? md_start_sync+0xe0/0xe0 [md_mod] ? md_thread+0x127/0x160 [md_mod] md_thread+0x127/0x160 [md_mod] ? wait_woken+0x80/0x80 kthread+0x10d/0x130 ? kthread_park+0xa0/0xa0 ret_from_fork+0x1f/0x40 ```
Fixes: dbb64f8635f5d ("md-cluster: Fix adding of new disk with new reload code") Fixes: 659b254fa7392 ("md-cluster: remove a disk asynchronously from cluster environment") Cc: stable@vger.kernel.org Reviewed-by: Gang He ghe@suse.com Signed-off-by: Heming Zhao heming.zhao@suse.com Signed-off-by: Song Liu song@kernel.org Signed-off-by: Greg Kroah-Hartman gregkh@linuxfoundation.org Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- drivers/md/md.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-)
diff --git a/drivers/md/md.c b/drivers/md/md.c index 013896b1d1ae0..2839b7bba0348 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -8952,11 +8952,11 @@ void md_check_recovery(struct mddev *mddev) }
if (mddev_is_clustered(mddev)) { - struct md_rdev *rdev; + struct md_rdev *rdev, *tmp; /* kick the device if another node issued a * remove disk. */ - rdev_for_each(rdev, mddev) { + rdev_for_each_safe(rdev, tmp, mddev) { if (test_and_clear_bit(ClusterRemove, &rdev->flags) && rdev->raid_disk < 0) md_kick_rdev_from_array(rdev); @@ -9260,7 +9260,7 @@ static int __init md_init(void) static void check_sb_changes(struct mddev *mddev, struct md_rdev *rdev) { struct mdp_superblock_1 *sb = page_address(rdev->sb_page); - struct md_rdev *rdev2; + struct md_rdev *rdev2, *tmp; int role, ret; char b[BDEVNAME_SIZE];
@@ -9277,7 +9277,7 @@ static void check_sb_changes(struct mddev *mddev, struct md_rdev *rdev) }
/* Check for change of roles in the active devices */ - rdev_for_each(rdev2, mddev) { + rdev_for_each_safe(rdev2, tmp, mddev) { if (test_bit(Faulty, &rdev2->flags)) continue;
From: Christoph Hellwig hch@lst.de
stable inclusion from linux-4.19.191 commit a61234356fcce492179427e5540bdce2abaa0191
--------------------------------
commit 65aa97c4d2bfd76677c211b9d03ef05a98c6d68e upstream.
Split mddev_find into a simple mddev_find that just finds an existing mddev by the unit number, and a more complicated mddev_find that deals with find or allocating a mddev.
This turns out to fix this bug reported by Zhao Heming.
----------------------------- snip ------------------------------ commit d3374825ce57 ("md: make devices disappear when they are no longer needed.") introduced protection between mddev creating & removing. The md_open shouldn't create mddev when all_mddevs list doesn't contain mddev. With currently code logic, there will be very easy to trigger soft lockup in non-preempt env.
Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- drivers/md/md.c | 24 +++++++++++++++++++----- 1 file changed, 19 insertions(+), 5 deletions(-)
diff --git a/drivers/md/md.c b/drivers/md/md.c index 2839b7bba0348..9cf8ed292befe 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -585,6 +585,22 @@ void mddev_init(struct mddev *mddev) EXPORT_SYMBOL_GPL(mddev_init);
static struct mddev *mddev_find(dev_t unit) +{ + struct mddev *mddev; + + if (MAJOR(unit) != MD_MAJOR) + unit &= ~((1 << MdpMinorShift) - 1); + + spin_lock(&all_mddevs_lock); + mddev = mddev_find_locked(unit); + if (mddev) + mddev_get(mddev); + spin_unlock(&all_mddevs_lock); + + return mddev; +} + +static struct mddev *mddev_find_or_alloc(dev_t unit) { struct mddev *mddev, *new = NULL;
@@ -5368,7 +5384,7 @@ static int md_alloc(dev_t dev, char *name) * writing to /sys/module/md_mod/parameters/new_array. */ static DEFINE_MUTEX(disks_mutex); - struct mddev *mddev = mddev_find(dev); + struct mddev *mddev = mddev_find_or_alloc(dev); struct gendisk *disk; int partitioned; int shift; @@ -6228,11 +6244,9 @@ static void autorun_devices(int part)
md_probe(dev, NULL, NULL); mddev = mddev_find(dev); - if (!mddev || !mddev->gendisk) { - if (mddev) - mddev_put(mddev); + if (!mddev) break; - } + if (mddev_lock(mddev)) pr_warn("md: %s locked, cannot run\n", mdname(mddev)); else if (mddev->raid_disks || mddev->major_version
From: Christoph Hellwig hch@lst.de
stable inclusion from linux-4.19.191 commit 587241f748424a7532b1cd9536b2128a47ee54a1
--------------------------------
commit 8b57251f9a91f5e5a599de7549915d2d226cc3af upstream.
Factor out a self-contained helper to just lookup a mddev by the dev_t "unit".
Cc: stable@vger.kernel.org Reviewed-by: Heming Zhao heming.zhao@suse.com Signed-off-by: Christoph Hellwig hch@lst.de Signed-off-by: Song Liu song@kernel.org Signed-off-by: Greg Kroah-Hartman gregkh@linuxfoundation.org Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- drivers/md/md.c | 32 +++++++++++++++++++------------- 1 file changed, 19 insertions(+), 13 deletions(-)
diff --git a/drivers/md/md.c b/drivers/md/md.c index 9cf8ed292befe..60682ba9ae556 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -584,6 +584,17 @@ void mddev_init(struct mddev *mddev) } EXPORT_SYMBOL_GPL(mddev_init);
+static struct mddev *mddev_find_locked(dev_t unit) +{ + struct mddev *mddev; + + list_for_each_entry(mddev, &all_mddevs, all_mddevs) + if (mddev->unit == unit) + return mddev; + + return NULL; +} + static struct mddev *mddev_find(dev_t unit) { struct mddev *mddev; @@ -611,13 +622,13 @@ static struct mddev *mddev_find_or_alloc(dev_t unit) spin_lock(&all_mddevs_lock);
if (unit) { - list_for_each_entry(mddev, &all_mddevs, all_mddevs) - if (mddev->unit == unit) { - mddev_get(mddev); - spin_unlock(&all_mddevs_lock); - kfree(new); - return mddev; - } + mddev = mddev_find_locked(unit); + if (mddev) { + mddev_get(mddev); + spin_unlock(&all_mddevs_lock); + kfree(new); + return mddev; + }
if (new) { list_add(&new->all_mddevs, &all_mddevs); @@ -643,12 +654,7 @@ static struct mddev *mddev_find_or_alloc(dev_t unit) return NULL; }
- is_free = 1; - list_for_each_entry(mddev, &all_mddevs, all_mddevs) - if (mddev->unit == dev) { - is_free = 0; - break; - } + is_free = !mddev_find_locked(dev); } new->unit = dev; new->md_minor = MINOR(dev);
From: Zhao Heming heming.zhao@suse.com
stable inclusion from linux-4.19.191 commit 5db6e4c5c1284cec6720afe27f202a3dca697def
--------------------------------
commit 6a4db2a60306eb65bfb14ccc9fde035b74a4b4e7 upstream.
commit d3374825ce57 ("md: make devices disappear when they are no longer needed.") introduced protection between mddev creating & removing. The md_open shouldn't create mddev when all_mddevs list doesn't contain mddev. With currently code logic, there will be very easy to trigger soft lockup in non-preempt env.
This patch changes md_open returning from -ERESTARTSYS to -EBUSY, which will break the infinitely retry when md_open enter racing area.
This patch is partly fix soft lockup issue, full fix needs mddev_find is split into two functions: mddev_find & mddev_find_or_alloc. And md_open should call new mddev_find (it only does searching job).
For more detail, please refer with Christoph's "split mddev_find" patch in later commits.
Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- drivers/md/md.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-)
diff --git a/drivers/md/md.c b/drivers/md/md.c index 60682ba9ae556..4045aa6103367 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -7550,8 +7550,7 @@ static int md_open(struct block_device *bdev, fmode_t mode) /* Wait until bdev->bd_disk is definitely gone */ if (work_pending(&mddev->del_work)) flush_workqueue(md_misc_wq); - /* Then retry the open from the top */ - return -ERESTARTSYS; + return -EBUSY; } BUG_ON(mddev != bdev->bd_disk->private_data);
From: Jan Glauber jglauber@digitalocean.com
stable inclusion from linux-4.19.191 commit bc1355f10b2acddee56680f567edaa0e4a18db44
--------------------------------
commit 7abfabaf5f805f5171d133ce6af9b65ab766e76a upstream.
Reading /proc/mdstat with a read buffer size that would not fit the unused status line in the first read will skip this line from the output.
So 'dd if=/proc/mdstat bs=64 2>/dev/null' will not print something like: unused devices: <none>
Don't return NULL immediately in start() for v=2 but call show() once to print the status line also for multiple reads.
Cc: stable@vger.kernel.org Fixes: 1f4aace60b0e ("fs/seq_file.c: simplify seq_file iteration code and interface") Signed-off-by: Jan Glauber jglauber@digitalocean.com Signed-off-by: Song Liu songliubraving@fb.com Signed-off-by: Greg Kroah-Hartman gregkh@linuxfoundation.org Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- drivers/md/md.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-)
diff --git a/drivers/md/md.c b/drivers/md/md.c index 4045aa6103367..c8f26788c3da5 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -7883,7 +7883,11 @@ static void *md_seq_start(struct seq_file *seq, loff_t *pos) loff_t l = *pos; struct mddev *mddev;
- if (l >= 0x10000) + if (l == 0x10000) { + ++*pos; + return (void *)2; + } + if (l > 0x10000) return NULL; if (!l--) /* header */
From: Sean Christopherson seanjc@google.com
stable inclusion from linux-4.19.191 commit bd1b3b2bd4dae57e5a2e06c340531137af633368
--------------------------------
commit b6b4fbd90b155a0025223df2c137af8a701d53b3 upstream.
Initialize MSR_TSC_AUX with CPU node information if RDTSCP or RDPID is supported. This fixes a bug where vdso_read_cpunode() will read garbage via RDPID if RDPID is supported but RDTSCP is not. While no known CPU supports RDPID but not RDTSCP, both Intel's SDM and AMD's APM allow for RDPID to exist without RDTSCP, e.g. it's technically a legal CPU model for a virtual machine.
Note, technically MSR_TSC_AUX could be initialized if and only if RDPID is supported since RDTSCP is currently not used to retrieve the CPU node. But, the cost of the superfluous WRMSR is negigible, whereas leaving MSR_TSC_AUX uninitialized is just asking for future breakage if someone decides to utilize RDTSCP.
Fixes: a582c540ac1b ("x86/vdso: Use RDPID in preference to LSL when available") Signed-off-by: Sean Christopherson seanjc@google.com Signed-off-by: Thomas Gleixner tglx@linutronix.de Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20210504225632.1532621-2-seanjc@google.com Signed-off-by: Greg Kroah-Hartman gregkh@linuxfoundation.org Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- arch/x86/entry/vdso/vma.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/arch/x86/entry/vdso/vma.c b/arch/x86/entry/vdso/vma.c index 5b8b556dbb12a..46caca4d9141b 100644 --- a/arch/x86/entry/vdso/vma.c +++ b/arch/x86/entry/vdso/vma.c @@ -343,7 +343,7 @@ static void vgetcpu_cpu_init(void *arg) #ifdef CONFIG_NUMA node = cpu_to_node(cpu); #endif - if (static_cpu_has(X86_FEATURE_RDTSCP)) + if (boot_cpu_has(X86_FEATURE_RDTSCP) || boot_cpu_has(X86_FEATURE_RDPID)) write_rdtscp_aux((node << 12) | cpu);
/*
From: Dan Carpenter dan.carpenter@oracle.com
stable inclusion from linux-4.19.191 commit a42e385f0a4160709882b9435b874a7cdab24d46
--------------------------------
commit 7b279bbfd2b230c7a210ff8f405799c7e46bbf48 upstream.
Smatch complains about missing that the ovl_override_creds() doesn't have a matching revert_creds() if the dentry is disconnected. Fix this by moving the ovl_override_creds() until after the disconnected check.
Fixes: aa3ff3c152ff ("ovl: copy up of disconnected dentries") Signed-off-by: Dan Carpenter dan.carpenter@oracle.com Signed-off-by: Miklos Szeredi mszeredi@redhat.com Signed-off-by: Greg Kroah-Hartman gregkh@linuxfoundation.org Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- fs/overlayfs/copy_up.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/fs/overlayfs/copy_up.c b/fs/overlayfs/copy_up.c index e164f489d01d9..30abafcd4ecce 100644 --- a/fs/overlayfs/copy_up.c +++ b/fs/overlayfs/copy_up.c @@ -824,7 +824,7 @@ static int ovl_copy_up_one(struct dentry *parent, struct dentry *dentry, int ovl_copy_up_flags(struct dentry *dentry, int flags) { int err = 0; - const struct cred *old_cred = ovl_override_creds(dentry->d_sb); + const struct cred *old_cred; bool disconnected = (dentry->d_flags & DCACHE_DISCONNECTED);
/* @@ -835,6 +835,7 @@ int ovl_copy_up_flags(struct dentry *dentry, int flags) if (WARN_ON(disconnected && d_is_dir(dentry))) return -EIO;
+ old_cred = ovl_override_creds(dentry->d_sb); while (!err) { struct dentry *next; struct dentry *parent = NULL;
From: Otavio Pontes otavio.pontes@intel.com
stable inclusion from linux-4.19.191 commit af6d4954f9d43def013d3621d944ea2770bbd08b
--------------------------------
[ Upstream commit 7189b3c11903667808029ec9766a6e96de5012a5 ]
Currently, the late microcode loading mechanism checks whether any CPUs are offlined, and, in such a case, aborts the load attempt.
However, this must be done before the kernel caches new microcode from the filesystem. Otherwise, when offlined CPUs are onlined later, those cores are going to be updated through the CPU hotplug notifier callback with the new microcode, while CPUs previously onine will continue to run with the older microcode.
For example:
Turn off one core (2 threads):
echo 0 > /sys/devices/system/cpu/cpu3/online echo 0 > /sys/devices/system/cpu/cpu1/online
Install the ucode fails because a primary SMT thread is offline:
cp intel-ucode/06-8e-09 /lib/firmware/intel-ucode/ echo 1 > /sys/devices/system/cpu/microcode/reload bash: echo: write error: Invalid argument
Turn the core back on
echo 1 > /sys/devices/system/cpu/cpu3/online echo 1 > /sys/devices/system/cpu/cpu1/online cat /proc/cpuinfo |grep microcode microcode : 0x30 microcode : 0xde microcode : 0x30 microcode : 0xde
The rationale for why the update is aborted when at least one primary thread is offline is because even if that thread is soft-offlined and idle, it will still have to participate in broadcasted MCE's synchronization dance or enter SMM, and in both examples it will execute instructions so it better have the same microcode revision as the other cores.
[ bp: Heavily edit and extend commit message with the reasoning behind all this. ]
Fixes: 30ec26da9967 ("x86/microcode: Do not upload microcode if CPUs are offline") Signed-off-by: Otavio Pontes otavio.pontes@intel.com Signed-off-by: Borislav Petkov bp@suse.de Reviewed-by: Tony Luck tony.luck@intel.com Acked-by: Ashok Raj ashok.raj@intel.com Link: https://lkml.kernel.org/r/20210319165515.9240-2-otavio.pontes@intel.com Signed-off-by: Sasha Levin sashal@kernel.org Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- arch/x86/kernel/cpu/microcode/core.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-)
diff --git a/arch/x86/kernel/cpu/microcode/core.c b/arch/x86/kernel/cpu/microcode/core.c index a96091d44a459..eab4de387ce64 100644 --- a/arch/x86/kernel/cpu/microcode/core.c +++ b/arch/x86/kernel/cpu/microcode/core.c @@ -627,16 +627,16 @@ static ssize_t reload_store(struct device *dev, if (val != 1) return size;
- tmp_ret = microcode_ops->request_microcode_fw(bsp, µcode_pdev->dev, true); - if (tmp_ret != UCODE_NEW) - return size; - get_online_cpus();
ret = check_online_cpus(); if (ret) goto put;
+ tmp_ret = microcode_ops->request_microcode_fw(bsp, µcode_pdev->dev, true); + if (tmp_ret != UCODE_NEW) + goto put; + mutex_lock(µcode_mutex); ret = microcode_reload_late(); mutex_unlock(µcode_mutex);
From: Arnd Bergmann arnd@arndb.de
stable inclusion from linux-4.19.191 commit f1dfa84c8f26d6bf372df85837ae0d7fbbc127a9
--------------------------------
[ Upstream commit 8e13d96670a4c050d4883e6743a9e9858e5cfe10 ]
When building with extra warnings enabled, clang points out a mistake in the error handling:
drivers/irqchip/irq-gic-v3-mbi.c:306:21: error: result of comparison of constant 18446744073709551615 with expression of type 'phys_addr_t' (aka 'unsigned int') is always false [-Werror,-Wtautological-constant-out-of-range-compare] if (mbi_phys_base == OF_BAD_ADDR) {
Truncate the constant to the same type as the variable it gets compared to, to shut make the check work and void the warning.
Fixes: 505287525c24 ("irqchip/gic-v3: Add support for Message Based Interrupts as an MSI controller") Signed-off-by: Arnd Bergmann arnd@arndb.de Signed-off-by: Marc Zyngier maz@kernel.org Link: https://lore.kernel.org/r/20210323131842.2773094-1-arnd@kernel.org Signed-off-by: Sasha Levin sashal@kernel.org Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- drivers/irqchip/irq-gic-v3-mbi.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/irqchip/irq-gic-v3-mbi.c b/drivers/irqchip/irq-gic-v3-mbi.c index fbfa7ff6deb16..9d011281d4b5c 100644 --- a/drivers/irqchip/irq-gic-v3-mbi.c +++ b/drivers/irqchip/irq-gic-v3-mbi.c @@ -297,7 +297,7 @@ int __init mbi_init(struct fwnode_handle *fwnode, struct irq_domain *parent) reg = of_get_property(np, "mbi-alias", NULL); if (reg) { mbi_phys_base = of_translate_address(np, reg); - if (mbi_phys_base == OF_BAD_ADDR) { + if (mbi_phys_base == (phys_addr_t)OF_BAD_ADDR) { ret = -ENXIO; goto err_free_mbi; }
From: Nathan Chancellor nathan@kernel.org
stable inclusion from linux-4.19.191 commit 443a6fdcd74be7c023e48c8fd5b589a661de7fe3
--------------------------------
[ Upstream commit 2bc6262c6117dd18106d5aa50d53e945b5d99c51 ]
All of the CPPC sysfs show functions are called via indirect call in kobj_attr_show(), where they should be of type
ssize_t (*show)(struct kobject *kobj, struct kobj_attribute *attr, char *buf);
because that is the type of the ->show() member in 'struct kobj_attribute' but they are actually of type
ssize_t (*show)(struct kobject *kobj, struct attribute *attr, char *buf);
because of the ->show() member in 'struct cppc_attr', resulting in a Control Flow Integrity violation [1].
$ cat /sys/devices/system/cpu/cpu0/acpi_cppc/highest_perf 3400
$ dmesg | grep "CFI failure" [ 175.970559] CFI failure (target: show_highest_perf+0x0/0x8):
As far as I can tell, the only difference between 'struct cppc_attr' and 'struct kobj_attribute' aside from the type of the attr parameter is the type of the count parameter in the ->store() member (ssize_t vs. size_t), which does not actually matter because all of these nodes are read-only.
Eliminate 'struct cppc_attr' in favor of 'struct kobj_attribute' to fix the violation.
[1]: https://lore.kernel.org/r/20210401233216.2540591-1-samitolvanen@google.com/
Fixes: 158c998ea44b ("ACPI / CPPC: add sysfs support to compute delivered performance") Link: https://github.com/ClangBuiltLinux/linux/issues/1343 Signed-off-by: Nathan Chancellor nathan@kernel.org Signed-off-by: Rafael J. Wysocki rafael.j.wysocki@intel.com Signed-off-by: Sasha Levin sashal@kernel.org Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- drivers/acpi/cppc_acpi.c | 14 +++----------- 1 file changed, 3 insertions(+), 11 deletions(-)
diff --git a/drivers/acpi/cppc_acpi.c b/drivers/acpi/cppc_acpi.c index 13d64dd4cfe0f..6134f20a13f0c 100644 --- a/drivers/acpi/cppc_acpi.c +++ b/drivers/acpi/cppc_acpi.c @@ -122,23 +122,15 @@ static DEFINE_PER_CPU(struct cpc_desc *, cpc_desc_ptr); */ #define NUM_RETRIES 500ULL
-struct cppc_attr { - struct attribute attr; - ssize_t (*show)(struct kobject *kobj, - struct attribute *attr, char *buf); - ssize_t (*store)(struct kobject *kobj, - struct attribute *attr, const char *c, ssize_t count); -}; - #define define_one_cppc_ro(_name) \ -static struct cppc_attr _name = \ +static struct kobj_attribute _name = \ __ATTR(_name, 0444, show_##_name, NULL)
#define to_cpc_desc(a) container_of(a, struct cpc_desc, kobj)
#define show_cppc_data(access_fn, struct_name, member_name) \ static ssize_t show_##member_name(struct kobject *kobj, \ - struct attribute *attr, char *buf) \ + struct kobj_attribute *attr, char *buf) \ { \ struct cpc_desc *cpc_ptr = to_cpc_desc(kobj); \ struct struct_name st_name = {0}; \ @@ -164,7 +156,7 @@ show_cppc_data(cppc_get_perf_ctrs, cppc_perf_fb_ctrs, reference_perf); show_cppc_data(cppc_get_perf_ctrs, cppc_perf_fb_ctrs, wraparound_time);
static ssize_t show_feedback_ctrs(struct kobject *kobj, - struct attribute *attr, char *buf) + struct kobj_attribute *attr, char *buf) { struct cpc_desc *cpc_ptr = to_cpc_desc(kobj); struct cppc_perf_fb_ctrs fb_ctrs = {0};
From: Masami Hiramatsu mhiramat@kernel.org
stable inclusion from linux-4.19.191 commit 609a2d6557e8a6d5dbb6bfcfc5b42185526a0c0b
--------------------------------
[ Upstream commit 6dd3b8c9f58816a1354be39559f630cd1bd12159 ]
There are 2 bugs in the can_boost() function because of using x86 insn decoder. Since the insn->opcode never has a prefix byte, it can not find CS override prefix in it. And the insn->attr is the attribute of the opcode, thus inat_is_address_size_prefix( insn->attr) always returns false.
Fix those by checking each prefix bytes with for_each_insn_prefix loop and getting the correct attribute for each prefix byte. Also, this removes unlikely, because this is a slow path.
Fixes: a8d11cd0714f ("kprobes/x86: Consolidate insn decoder users for copying code") Signed-off-by: Masami Hiramatsu mhiramat@kernel.org Signed-off-by: Ingo Molnar mingo@kernel.org Link: https://lore.kernel.org/r/161666691162.1120877.2808435205294352583.stgit@dev... Signed-off-by: Sasha Levin sashal@kernel.org Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- arch/x86/kernel/kprobes/core.c | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-)
diff --git a/arch/x86/kernel/kprobes/core.c b/arch/x86/kernel/kprobes/core.c index 2bb9a4c862b1a..d04f670f52629 100644 --- a/arch/x86/kernel/kprobes/core.c +++ b/arch/x86/kernel/kprobes/core.c @@ -170,6 +170,8 @@ NOKPROBE_SYMBOL(skip_prefixes); int can_boost(struct insn *insn, void *addr) { kprobe_opcode_t opcode; + insn_byte_t prefix; + int i;
if (search_exception_tables((unsigned long)addr)) return 0; /* Page fault may occur on this address. */ @@ -182,9 +184,14 @@ int can_boost(struct insn *insn, void *addr) if (insn->opcode.nbytes != 1) return 0;
- /* Can't boost Address-size override prefix */ - if (unlikely(inat_is_address_size_prefix(insn->attr))) - return 0; + for_each_insn_prefix(insn, i, prefix) { + insn_attr_t attr; + + attr = inat_get_opcode_attribute(prefix); + /* Can't boost Address-size override prefix and CS override prefix */ + if (prefix == 0x2e || inat_is_address_size_prefix(attr)) + return 0; + }
opcode = insn->opcode.bytes[0];
@@ -209,8 +216,8 @@ int can_boost(struct insn *insn, void *addr) /* clear and set flags are boostable */ return (opcode == 0xf5 || (0xf7 < opcode && opcode < 0xfe)); default: - /* CS override prefix and call are not boostable */ - return (opcode != 0x2e && opcode != 0x9a); + /* call is not boostable */ + return opcode != 0x9a; } }
From: Sergey Shtylyov s.shtylyov@omprussia.ru
stable inclusion from linux-4.19.191 commit 77b2bc239cc33ae053a72152f2d4a438ef5836fc
--------------------------------
[ Upstream commit b30d0040f06159de97ad9c0b1536f47250719d7d ]
Iff platform_get_irq() returns 0, ahci_platform_init_host() would return 0 early (as if the call was successful). Override IRQ0 with -EINVAL instead as the 'libata' regards 0 as "no IRQ" (thus polling) anyway...
Fixes: c034640a32f8 ("ata: libahci: properly propagate return value of platform_get_irq()") Signed-off-by: Sergey Shtylyov s.shtylyov@omprussia.ru Link: https://lore.kernel.org/r/4448c8cc-331f-2915-0e17-38ea34e251c8@omprussia.ru Signed-off-by: Jens Axboe axboe@kernel.dk Signed-off-by: Sasha Levin sashal@kernel.org Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- drivers/ata/libahci_platform.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-)
diff --git a/drivers/ata/libahci_platform.c b/drivers/ata/libahci_platform.c index 522b543f718d8..6a55aac0c60fc 100644 --- a/drivers/ata/libahci_platform.c +++ b/drivers/ata/libahci_platform.c @@ -544,11 +544,13 @@ int ahci_platform_init_host(struct platform_device *pdev, int i, irq, n_ports, rc;
irq = platform_get_irq(pdev, 0); - if (irq <= 0) { + if (irq < 0) { if (irq != -EPROBE_DEFER) dev_err(dev, "no irq\n"); return irq; } + if (!irq) + return -EINVAL;
hpriv->irq = irq;
From: Jason Gunthorpe jgg@nvidia.com
stable inclusion from linux-4.19.191 commit 6af3de26ee2b42ff5104a0a0f048df82e37fbc82
--------------------------------
[ Upstream commit b5a1f8921d5040bb788492bf33a66758021e4be5 ]
There is a small race where the parent is NULL even though the kobj has already been made visible in sysfs.
For instance the attribute_group is made visible in sysfs_create_files() and the mdev_type_attr_show() does:
ret = attr->show(kobj, type->parent->dev, buf);
Which will crash on NULL parent. Move the parent setup to before the type pointer leaves the stack frame.
Fixes: 7b96953bc640 ("vfio: Mediated device Core driver") Reviewed-by: Christoph Hellwig hch@lst.de Reviewed-by: Kevin Tian kevin.tian@intel.com Reviewed-by: Max Gurtovoy mgurtovoy@nvidia.com Reviewed-by: Cornelia Huck cohuck@redhat.com Signed-off-by: Jason Gunthorpe jgg@nvidia.com Message-Id: 2-v2-d36939638fc6+d54-vfio2_jgg@nvidia.com Signed-off-by: Alex Williamson alex.williamson@redhat.com Signed-off-by: Sasha Levin sashal@kernel.org Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- drivers/vfio/mdev/mdev_sysfs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/vfio/mdev/mdev_sysfs.c b/drivers/vfio/mdev/mdev_sysfs.c index 1692a0cc30360..c99fcc6c2eba8 100644 --- a/drivers/vfio/mdev/mdev_sysfs.c +++ b/drivers/vfio/mdev/mdev_sysfs.c @@ -108,6 +108,7 @@ struct mdev_type *add_mdev_supported_type(struct mdev_parent *parent, return ERR_PTR(-ENOMEM);
type->kobj.kset = parent->mdev_types_kset; + type->parent = parent;
ret = kobject_init_and_add(&type->kobj, &mdev_type_ktype, NULL, "%s-%s", dev_driver_string(parent->dev), @@ -135,7 +136,6 @@ struct mdev_type *add_mdev_supported_type(struct mdev_parent *parent, }
type->group = group; - type->parent = parent; return type;
attrs_failed:
From: Nathan Chancellor nathan@kernel.org
stable inclusion from linux-4.19.191 commit 8bfc7a72ac0e2e0290282cb6c647291ab2e16ff5
--------------------------------
[ Upstream commit de5bc7b425d4c27ae5faa00ea7eb6b9780b9a355 ]
dev_attr_show() calls _iommu_event_show() via an indirect call but _iommu_event_show()'s type does not currently match the type of the show() member in 'struct device_attribute', resulting in a Control Flow Integrity violation.
$ cat /sys/devices/amd_iommu_1/events/mem_dte_hit csource=0x0a
$ dmesg | grep "CFI failure" [ 3526.735140] CFI failure (target: _iommu_event_show...):
Change _iommu_event_show() and 'struct amd_iommu_event_desc' to 'struct device_attribute' so that there is no more CFI violation.
Fixes: 7be6296fdd75 ("perf/x86/amd: AMD IOMMU Performance Counter PERF uncore PMU implementation") Signed-off-by: Nathan Chancellor nathan@kernel.org Signed-off-by: Peter Zijlstra (Intel) peterz@infradead.org Link: https://lkml.kernel.org/r/20210415001112.3024673-1-nathan@kernel.org Signed-off-by: Sasha Levin sashal@kernel.org Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- arch/x86/events/amd/iommu.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-)
diff --git a/arch/x86/events/amd/iommu.c b/arch/x86/events/amd/iommu.c index 3210fee27e7f9..55f37973f9f87 100644 --- a/arch/x86/events/amd/iommu.c +++ b/arch/x86/events/amd/iommu.c @@ -84,12 +84,12 @@ static struct attribute_group amd_iommu_events_group = { };
struct amd_iommu_event_desc { - struct kobj_attribute attr; + struct device_attribute attr; const char *event; };
-static ssize_t _iommu_event_show(struct kobject *kobj, - struct kobj_attribute *attr, char *buf) +static ssize_t _iommu_event_show(struct device *dev, + struct device_attribute *attr, char *buf) { struct amd_iommu_event_desc *event = container_of(attr, struct amd_iommu_event_desc, attr);
From: Waiman Long longman@redhat.com
stable inclusion from linux-4.19.191 commit 302d674cfacd2a89ba16beb973c757cb977e32a0
--------------------------------
[ Upstream commit ad789f84c9a145f8a18744c0387cec22ec51651e ]
The handling of sysrq key can be activated by echoing the key to /proc/sysrq-trigger or via the magic key sequence typed into a terminal that is connected to the system in some way (serial, USB or other mean). In the former case, the handling is done in a user context. In the latter case, it is likely to be in an interrupt context.
Currently in print_cpu() of kernel/sched/debug.c, sched_debug_lock is taken with interrupt disabled for the whole duration of the calls to print_*_stats() and print_rq() which could last for the quite some time if the information dump happens on the serial console.
If the system has many cpus and the sched_debug_lock is somehow busy (e.g. parallel sysrq-t), the system may hit a hard lockup panic depending on the actually serial console implementation of the system.
The purpose of sched_debug_lock is to serialize the use of the global cgroup_path[] buffer in print_cpu(). The rests of the printk calls don't need serialization from sched_debug_lock.
Calling printk() with interrupt disabled can still be problematic if multiple instances are running. Allocating a stack buffer of PATH_MAX bytes is not feasible because of the limited size of the kernel stack.
The solution implemented in this patch is to allow only one caller at a time to use the full size group_path[], while other simultaneous callers will have to use shorter stack buffers with the possibility of path name truncation. A "..." suffix will be printed if truncation may have happened. The cgroup path name is provided for informational purpose only, so occasional path name truncation should not be a big problem.
Fixes: efe25c2c7b3a ("sched: Reinstate group names in /proc/sched_debug") Suggested-by: Peter Zijlstra peterz@infradead.org Signed-off-by: Waiman Long longman@redhat.com Signed-off-by: Peter Zijlstra (Intel) peterz@infradead.org Link: https://lkml.kernel.org/r/20210415195426.6677-1-longman@redhat.com Signed-off-by: Sasha Levin sashal@kernel.org Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- kernel/sched/debug.c | 42 +++++++++++++++++++++++++++++------------- 1 file changed, 29 insertions(+), 13 deletions(-)
diff --git a/kernel/sched/debug.c b/kernel/sched/debug.c index f12382877390c..3353a2f936f33 100644 --- a/kernel/sched/debug.c +++ b/kernel/sched/debug.c @@ -11,8 +11,6 @@ */ #include "sched.h"
-static DEFINE_SPINLOCK(sched_debug_lock); - /* * This allows printing both to /proc/sched_debug and * to the console @@ -434,16 +432,37 @@ static void print_cfs_group_stats(struct seq_file *m, int cpu, struct task_group #endif
#ifdef CONFIG_CGROUP_SCHED +static DEFINE_SPINLOCK(sched_debug_lock); static char group_path[PATH_MAX];
-static char *task_group_path(struct task_group *tg) +static void task_group_path(struct task_group *tg, char *path, int plen) { - if (autogroup_path(tg, group_path, PATH_MAX)) - return group_path; + if (autogroup_path(tg, path, plen)) + return;
- cgroup_path(tg->css.cgroup, group_path, PATH_MAX); + cgroup_path(tg->css.cgroup, path, plen); +}
- return group_path; +/* + * Only 1 SEQ_printf_task_group_path() caller can use the full length + * group_path[] for cgroup path. Other simultaneous callers will have + * to use a shorter stack buffer. A "..." suffix is appended at the end + * of the stack buffer so that it will show up in case the output length + * matches the given buffer size to indicate possible path name truncation. + */ +#define SEQ_printf_task_group_path(m, tg, fmt...) \ +{ \ + if (spin_trylock(&sched_debug_lock)) { \ + task_group_path(tg, group_path, sizeof(group_path)); \ + SEQ_printf(m, fmt, group_path); \ + spin_unlock(&sched_debug_lock); \ + } else { \ + char buf[128]; \ + char *bufend = buf + sizeof(buf) - 3; \ + task_group_path(tg, buf, bufend - buf); \ + strcpy(bufend - 1, "..."); \ + SEQ_printf(m, fmt, buf); \ + } \ } #endif
@@ -470,7 +489,7 @@ print_task(struct seq_file *m, struct rq *rq, struct task_struct *p) SEQ_printf(m, " %d %d", task_node(p), task_numa_group_id(p)); #endif #ifdef CONFIG_CGROUP_SCHED - SEQ_printf(m, " %s", task_group_path(task_group(p))); + SEQ_printf_task_group_path(m, task_group(p), " %s") #endif
SEQ_printf(m, "\n"); @@ -507,7 +526,7 @@ void print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq)
#ifdef CONFIG_FAIR_GROUP_SCHED SEQ_printf(m, "\n"); - SEQ_printf(m, "cfs_rq[%d]:%s\n", cpu, task_group_path(cfs_rq->tg)); + SEQ_printf_task_group_path(m, cfs_rq->tg, "cfs_rq[%d]:%s\n", cpu); #else SEQ_printf(m, "\n"); SEQ_printf(m, "cfs_rq[%d]:\n", cpu); @@ -579,7 +598,7 @@ void print_rt_rq(struct seq_file *m, int cpu, struct rt_rq *rt_rq) { #ifdef CONFIG_RT_GROUP_SCHED SEQ_printf(m, "\n"); - SEQ_printf(m, "rt_rq[%d]:%s\n", cpu, task_group_path(rt_rq->tg)); + SEQ_printf_task_group_path(m, rt_rq->tg, "rt_rq[%d]:%s\n", cpu); #else SEQ_printf(m, "\n"); SEQ_printf(m, "rt_rq[%d]:\n", cpu); @@ -631,7 +650,6 @@ void print_dl_rq(struct seq_file *m, int cpu, struct dl_rq *dl_rq) static void print_cpu(struct seq_file *m, int cpu) { struct rq *rq = cpu_rq(cpu); - unsigned long flags;
#ifdef CONFIG_X86 { @@ -690,13 +708,11 @@ do { \ } #undef P
- spin_lock_irqsave(&sched_debug_lock, flags); print_cfs_stats(m, cpu); print_rt_stats(m, cpu); print_dl_stats(m, cpu);
print_rq(m, rq, cpu); - spin_unlock_irqrestore(&sched_debug_lock, flags); SEQ_printf(m, "\n"); }
From: Lv Yunlong lyl2019@mail.ustc.edu.cn
stable inclusion from linux-4.19.191 commit 7585643856ae82e1ed105b261d4e57b384648a69
--------------------------------
[ Upstream commit 72ce11ddfa4e9e1879103581a60b7e34547eaa0a ]
In null_init, null_add_dev(dev) is called. In null_add_dev, it calls null_free_zoned_dev(dev) to free dev->zones via kvfree(dev->zones) in out_cleanup_zone branch and returns err. Then null_init accept the err code and then calls null_free_dev(dev).
But in null_free_dev(dev), dev->zones is freed again by null_free_zoned_dev().
My patch set dev->zones to NULL in null_free_zoned_dev() after kvfree(dev->zones) is called, to avoid the double free.
Fixes: 2984c8684f962 ("nullb: factor disk parameters") Signed-off-by: Lv Yunlong lyl2019@mail.ustc.edu.cn Link: https://lore.kernel.org/r/20210426143229.7374-1-lyl2019@mail.ustc.edu.cn Signed-off-by: Jens Axboe axboe@kernel.dk Signed-off-by: Sasha Levin sashal@kernel.org Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- drivers/block/null_blk_zoned.c | 1 + 1 file changed, 1 insertion(+)
diff --git a/drivers/block/null_blk_zoned.c b/drivers/block/null_blk_zoned.c index 1a74871f1bfdd..f64e9122824b4 100644 --- a/drivers/block/null_blk_zoned.c +++ b/drivers/block/null_blk_zoned.c @@ -52,6 +52,7 @@ int null_zone_init(struct nullb_device *dev) void null_zone_exit(struct nullb_device *dev) { kvfree(dev->zones); + dev->zones = NULL; }
static void null_zone_fill_bio(struct nullb_device *dev, struct bio *bio,
From: Wang Wensheng wangwensheng4@huawei.com
stable inclusion from linux-4.19.191 commit c75dd20796f4504eb55e60c4b0d90221cedb8dbe
--------------------------------
[ Upstream commit 2284f47fe9fe2ed2ef619e5474e155cfeeebd569 ]
sparse_buffer_init() and sparse_buffer_fini() should appear in pair, or a WARN issue would be through the next time sparse_buffer_init() runs.
Add the missing sparse_buffer_fini() in error branch.
Link: https://lkml.kernel.org/r/20210325113155.118574-1-wangwensheng4@huawei.com Fixes: 85c77f791390 ("mm/sparse: add new sparse_init_nid() and sparse_init()") Signed-off-by: Wang Wensheng wangwensheng4@huawei.com Reviewed-by: David Hildenbrand david@redhat.com Reviewed-by: Oscar Salvador osalvador@suse.de Cc: Pavel Tatashin pasha.tatashin@oracle.com Signed-off-by: Andrew Morton akpm@linux-foundation.org Signed-off-by: Linus Torvalds torvalds@linux-foundation.org Signed-off-by: Sasha Levin sashal@kernel.org Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- mm/sparse.c | 1 + 1 file changed, 1 insertion(+)
diff --git a/mm/sparse.c b/mm/sparse.c index 9854aff6b4193..62ae3880a9add 100644 --- a/mm/sparse.c +++ b/mm/sparse.c @@ -477,6 +477,7 @@ static void __init sparse_init_nid(int nid, unsigned long pnum_begin, pr_err("%s: node[%d] memory map backing failed. Some memory will not be available.", __func__, nid); pnum_begin = pnum; + sparse_buffer_fini(); goto failed; } check_usemap_section_nr(nid, usemap);
From: Jane Chu jane.chu@oracle.com
stable inclusion from linux-4.19.191 commit e816fbc72270850317505ca8d6d80d652345310f
--------------------------------
[ Upstream commit 4d75136be8bf3ae01b0bc3e725b2cdc921e103bd ]
It appears that unmap_mapping_range() actually takes a 'size' as its third argument rather than a location, the current calling fashion causes unnecessary amount of unmapping to occur.
Link: https://lkml.kernel.org/r/20210420002821.2749748-1-jane.chu@oracle.com Fixes: 6100e34b2526e ("mm, memory_failure: Teach memory_failure() about dev_pagemap pages") Signed-off-by: Jane Chu jane.chu@oracle.com Reviewed-by: Dan Williams dan.j.williams@intel.com Reviewed-by: Naoya Horiguchi naoya.horiguchi@nec.com Cc: Dave Jiang dave.jiang@intel.com Signed-off-by: Andrew Morton akpm@linux-foundation.org Signed-off-by: Linus Torvalds torvalds@linux-foundation.org Signed-off-by: Sasha Levin sashal@kernel.org Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- mm/memory-failure.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/mm/memory-failure.c b/mm/memory-failure.c index d15ffccb2db40..6fc0695f70e7c 100644 --- a/mm/memory-failure.c +++ b/mm/memory-failure.c @@ -1220,7 +1220,7 @@ static int memory_failure_dev_pagemap(unsigned long pfn, int flags, * communicated in siginfo, see kill_proc() */ start = (page->index << PAGE_SHIFT) & ~(size - 1); - unmap_mapping_range(page->mapping, start, start + size, 0); + unmap_mapping_range(page->mapping, start, size, 0); } kill_procs(&tokill, flags & MF_MUST_KILL, !unmap_success, pfn, flags); rc = 0;
From: Jonathon Reinhart jonathon.reinhart@gmail.com
stable inclusion from linux-4.19.191 commit 992de06308d9a9584d59b96d294ac676f924e437
--------------------------------
commit 8d432592f30fcc34ef5a10aac4887b4897884493 upstream.
tcp_set_default_congestion_control() is netns-safe in that it writes to &net->ipv4.tcp_congestion_control, but it also sets ca->flags |= TCP_CONG_NON_RESTRICTED which is not namespaced. This has the unintended side-effect of changing the global net.ipv4.tcp_allowed_congestion_control sysctl, despite the fact that it is read-only: 97684f0970f6 ("net: Make tcp_allowed_congestion_control readonly in non-init netns")
Resolve this netns "leak" by only allowing the init netns to set the default algorithm to one that is restricted. This restriction could be removed if tcp_allowed_congestion_control were namespace-ified in the future.
This bug was uncovered with https://github.com/JonathonReinhart/linux-netns-sysctl-verify
Fixes: 6670e1524477 ("tcp: Namespace-ify sysctl_tcp_default_congestion_control") Signed-off-by: Jonathon Reinhart jonathon.reinhart@gmail.com Signed-off-by: David S. Miller davem@davemloft.net Signed-off-by: Greg Kroah-Hartman gregkh@linuxfoundation.org Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- net/ipv4/tcp_cong.c | 4 ++++ 1 file changed, 4 insertions(+)
diff --git a/net/ipv4/tcp_cong.c b/net/ipv4/tcp_cong.c index 00a7482b6fbd9..533f8d84d2f71 100644 --- a/net/ipv4/tcp_cong.c +++ b/net/ipv4/tcp_cong.c @@ -228,6 +228,10 @@ int tcp_set_default_congestion_control(struct net *net, const char *name) ret = -ENOENT; } else if (!try_module_get(ca->owner)) { ret = -EBUSY; + } else if (!net_eq(net, &init_net) && + !(ca->flags & TCP_CONG_NON_RESTRICTED)) { + /* Only init netns can set default to a restricted algorithm */ + ret = -EPERM; } else { prev = xchg(&net->ipv4.tcp_congestion_control, ca); if (prev)
From: Arnd Bergmann arnd@arndb.de
stable inclusion from linux-4.19.191 commit db0517ac659e0ac61b916cffc29564cf3ab58b0d
--------------------------------
commit 1139aeb1c521eb4a050920ce6c64c36c4f2a3ab7 upstream.
As of commit 966a967116e6 ("smp: Avoid using two cache lines for struct call_single_data"), the smp code prefers 32-byte aligned call_single_data objects for performance reasons, but the block layer includes an instance of this structure in the main 'struct request' that is more senstive to size than to performance here, see 4ccafe032005 ("block: unalign call_single_data in struct request").
The result is a violation of the calling conventions that clang correctly points out:
block/blk-mq.c:630:39: warning: passing 8-byte aligned argument to 32-byte aligned parameter 2 of 'smp_call_function_single_async' may result in an unaligned pointer access [-Walign-mismatch] smp_call_function_single_async(cpu, &rq->csd);
It does seem that the usage of the call_single_data without cache line alignment should still be allowed by the smp code, so just change the function prototype so it accepts both, but leave the default alignment unchanged for the other users. This seems better to me than adding a local hack to shut up an otherwise correct warning in the caller.
Signed-off-by: Arnd Bergmann arnd@arndb.de Signed-off-by: Peter Zijlstra (Intel) peterz@infradead.org Acked-by: Jens Axboe axboe@kernel.dk Link: https://lkml.kernel.org/r/20210505211300.3174456-1-arnd@kernel.org [nc: Fix conflicts] Signed-off-by: Nathan Chancellor nathan@kernel.org Signed-off-by: Greg Kroah-Hartman gregkh@linuxfoundation.org Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- include/linux/smp.h | 2 +- kernel/smp.c | 10 +++++----- kernel/up.c | 2 +- 3 files changed, 7 insertions(+), 7 deletions(-)
diff --git a/include/linux/smp.h b/include/linux/smp.h index 9fb239e12b824..6bb7f07bc1dd2 100644 --- a/include/linux/smp.h +++ b/include/linux/smp.h @@ -53,7 +53,7 @@ void on_each_cpu_cond(bool (*cond_func)(int cpu, void *info), smp_call_func_t func, void *info, bool wait, gfp_t gfp_flags);
-int smp_call_function_single_async(int cpu, call_single_data_t *csd); +int smp_call_function_single_async(int cpu, struct __call_single_data *csd);
#ifdef CONFIG_SMP
diff --git a/kernel/smp.c b/kernel/smp.c index 084c8b3a26812..00d208ef07c76 100644 --- a/kernel/smp.c +++ b/kernel/smp.c @@ -103,12 +103,12 @@ void __init call_function_init(void) * previous function call. For multi-cpu calls its even more interesting * as we'll have to ensure no other cpu is observing our csd. */ -static __always_inline void csd_lock_wait(call_single_data_t *csd) +static __always_inline void csd_lock_wait(struct __call_single_data *csd) { smp_cond_load_acquire(&csd->flags, !(VAL & CSD_FLAG_LOCK)); }
-static __always_inline void csd_lock(call_single_data_t *csd) +static __always_inline void csd_lock(struct __call_single_data *csd) { csd_lock_wait(csd); csd->flags |= CSD_FLAG_LOCK; @@ -121,7 +121,7 @@ static __always_inline void csd_lock(call_single_data_t *csd) smp_wmb(); }
-static __always_inline void csd_unlock(call_single_data_t *csd) +static __always_inline void csd_unlock(struct __call_single_data *csd) { WARN_ON(!(csd->flags & CSD_FLAG_LOCK));
@@ -138,7 +138,7 @@ static DEFINE_PER_CPU_SHARED_ALIGNED(call_single_data_t, csd_data); * for execution on the given CPU. data must already have * ->func, ->info, and ->flags set. */ -static int generic_exec_single(int cpu, call_single_data_t *csd, +static int generic_exec_single(int cpu, struct __call_single_data *csd, smp_call_func_t func, void *info) { if (cpu == smp_processor_id()) { @@ -323,7 +323,7 @@ EXPORT_SYMBOL(smp_call_function_single); * NOTE: Be careful, there is unfortunately no current debugging facility to * validate the correctness of this serialization. */ -int smp_call_function_single_async(int cpu, call_single_data_t *csd) +int smp_call_function_single_async(int cpu, struct __call_single_data *csd) { int err = 0;
diff --git a/kernel/up.c b/kernel/up.c index 42c46bf3e0a5c..2080f75e0e65c 100644 --- a/kernel/up.c +++ b/kernel/up.c @@ -23,7 +23,7 @@ int smp_call_function_single(int cpu, void (*func) (void *info), void *info, } EXPORT_SYMBOL(smp_call_function_single);
-int smp_call_function_single_async(int cpu, call_single_data_t *csd) +int smp_call_function_single_async(int cpu, struct __call_single_data *csd) { unsigned long flags;
From: Xin Long lucien.xin@gmail.com
stable inclusion from linux-4.19.191 commit c61ecd2e8e70ebdb03ee6b54a261d907f1664b43
--------------------------------
commit 01bfe5e8e428b475982a98a46cca5755726f3f7f upstream.
This reverts commit b166a20b07382b8bc1dcee2a448715c9c2c81b5b.
This one has to be reverted as it introduced a dead lock, as syzbot reported:
CPU0 CPU1 ---- ---- lock(&net->sctp.addr_wq_lock); lock(slock-AF_INET6); lock(&net->sctp.addr_wq_lock); lock(slock-AF_INET6);
CPU0 is the thread of sctp_addr_wq_timeout_handler(), and CPU1 is that of sctp_close().
The original issue this commit fixed will be fixed in the next patch.
Reported-by: syzbot+959223586843e69a2674@syzkaller.appspotmail.com Signed-off-by: Xin Long lucien.xin@gmail.com Signed-off-by: David S. Miller davem@davemloft.net Signed-off-by: Greg Kroah-Hartman gregkh@linuxfoundation.org Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- net/sctp/socket.c | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-)
diff --git a/net/sctp/socket.c b/net/sctp/socket.c index addfc8feaef57..733c3f4480087 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -1571,9 +1571,11 @@ static void sctp_close(struct sock *sk, long timeout)
/* Supposedly, no process has access to the socket, but * the net layers still may. + * Also, sctp_destroy_sock() needs to be called with addr_wq_lock + * held and that should be grabbed before socket lock. */ - local_bh_disable(); - bh_lock_sock(sk); + spin_lock_bh(&net->sctp.addr_wq_lock); + bh_lock_sock_nested(sk);
/* Hold the sock, since sk_common_release() will put sock_put() * and we have just a little more cleanup. @@ -1582,7 +1584,7 @@ static void sctp_close(struct sock *sk, long timeout) sk_common_release(sk);
bh_unlock_sock(sk); - local_bh_enable(); + spin_unlock_bh(&net->sctp.addr_wq_lock);
sock_put(sk);
@@ -4777,6 +4779,9 @@ static int sctp_init_sock(struct sock *sk) sk_sockets_allocated_inc(sk); sock_prot_inuse_add(net, sk->sk_prot, 1);
+ /* Nothing can fail after this block, otherwise + * sctp_destroy_sock() will be called without addr_wq_lock held + */ if (net->sctp.default_auto_asconf) { spin_lock(&sock_net(sk)->sctp.addr_wq_lock); list_add_tail(&sp->auto_asconf_list, @@ -4811,9 +4816,7 @@ static void sctp_destroy_sock(struct sock *sk)
if (sp->do_auto_asconf) { sp->do_auto_asconf = 0; - spin_lock_bh(&sock_net(sk)->sctp.addr_wq_lock); list_del(&sp->auto_asconf_list); - spin_unlock_bh(&sock_net(sk)->sctp.addr_wq_lock); } sctp_endpoint_free(sp->ep); local_bh_disable();
From: Xin Long lucien.xin@gmail.com
stable inclusion from linux-4.19.191 commit 59339c866e0428fb92bfb3f5290c49a5325d2494
--------------------------------
commit 34e5b01186858b36c4d7c87e1a025071e8e2401f upstream.
As Or Cohen described:
If sctp_destroy_sock is called without sock_net(sk)->sctp.addr_wq_lock held and sp->do_auto_asconf is true, then an element is removed from the auto_asconf_splist without any proper locking.
This can happen in the following functions: 1. In sctp_accept, if sctp_sock_migrate fails. 2. In inet_create or inet6_create, if there is a bpf program attached to BPF_CGROUP_INET_SOCK_CREATE which denies creation of the sctp socket.
This patch is to fix it by moving the auto_asconf init out of sctp_init_sock(), by which inet_create()/inet6_create() won't need to operate it in sctp_destroy_sock() when calling sk_common_release().
It also makes more sense to do auto_asconf init while binding the first addr, as auto_asconf actually requires an ANY addr bind, see it in sctp_addr_wq_timeout_handler().
This addresses CVE-2021-23133.
Fixes: 610236587600 ("bpf: Add new cgroup attach type to enable sock modifications") Reported-by: Or Cohen orcohen@paloaltonetworks.com Signed-off-by: Xin Long lucien.xin@gmail.com Signed-off-by: David S. Miller davem@davemloft.net Signed-off-by: Greg Kroah-Hartman gregkh@linuxfoundation.org Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- net/sctp/socket.c | 31 +++++++++++++++++-------------- 1 file changed, 17 insertions(+), 14 deletions(-)
diff --git a/net/sctp/socket.c b/net/sctp/socket.c index 733c3f4480087..cfa54e56add92 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -375,6 +375,18 @@ static struct sctp_af *sctp_sockaddr_af(struct sctp_sock *opt, return af; }
+static void sctp_auto_asconf_init(struct sctp_sock *sp) +{ + struct net *net = sock_net(&sp->inet.sk); + + if (net->sctp.default_auto_asconf) { + spin_lock(&net->sctp.addr_wq_lock); + list_add_tail(&sp->auto_asconf_list, &net->sctp.auto_asconf_splist); + spin_unlock(&net->sctp.addr_wq_lock); + sp->do_auto_asconf = 1; + } +} + /* Bind a local address either to an endpoint or to an association. */ static int sctp_do_bind(struct sock *sk, union sctp_addr *addr, int len) { @@ -437,8 +449,10 @@ static int sctp_do_bind(struct sock *sk, union sctp_addr *addr, int len) }
/* Refresh ephemeral port. */ - if (!bp->port) + if (!bp->port) { bp->port = inet_sk(sk)->inet_num; + sctp_auto_asconf_init(sp); + }
/* Add the address to the bind address list. * Use GFP_ATOMIC since BHs will be disabled. @@ -4779,19 +4793,6 @@ static int sctp_init_sock(struct sock *sk) sk_sockets_allocated_inc(sk); sock_prot_inuse_add(net, sk->sk_prot, 1);
- /* Nothing can fail after this block, otherwise - * sctp_destroy_sock() will be called without addr_wq_lock held - */ - if (net->sctp.default_auto_asconf) { - spin_lock(&sock_net(sk)->sctp.addr_wq_lock); - list_add_tail(&sp->auto_asconf_list, - &net->sctp.auto_asconf_splist); - sp->do_auto_asconf = 1; - spin_unlock(&sock_net(sk)->sctp.addr_wq_lock); - } else { - sp->do_auto_asconf = 0; - } - local_bh_enable();
return 0; @@ -8851,6 +8852,8 @@ static void sctp_sock_migrate(struct sock *oldsk, struct sock *newsk, sctp_bind_addr_dup(&newsp->ep->base.bind_addr, &oldsp->ep->base.bind_addr, GFP_KERNEL);
+ sctp_auto_asconf_init(newsp); + /* Move any messages in the old socket's receive queue that are for the * peeled off association to the new socket's receive queue. */
From: Zhen Lei thunder.leizhen@huawei.com
stable inclusion from linux-4.19.191 commit f9361d826c874142b3a1a5f1e8366d3d046ce89c
--------------------------------
commit 1df83992d977355177810c2b711afc30546c81ce upstream.
If the total number of commands queried through TPM2_CAP_COMMANDS is different from that queried through TPM2_CC_GET_CAPABILITY, it indicates an unknown error. In this case, an appropriate error code -EFAULT should be returned. However, we currently do not explicitly assign this error code to 'rc'. As a result, 0 was incorrectly returned.
Cc: stable@vger.kernel.org Fixes: 58472f5cd4f6("tpm: validate TPM 2.0 commands") Reported-by: Hulk Robot hulkci@huawei.com Signed-off-by: Zhen Lei thunder.leizhen@huawei.com Reviewed-by: Jarkko Sakkinen jarkko@kernel.org Signed-off-by: Jarkko Sakkinen jarkko@kernel.org Signed-off-by: Greg Kroah-Hartman gregkh@linuxfoundation.org Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- drivers/char/tpm/tpm2-cmd.c | 1 + 1 file changed, 1 insertion(+)
diff --git a/drivers/char/tpm/tpm2-cmd.c b/drivers/char/tpm/tpm2-cmd.c index 73593855b2c98..fcf07452ec1aa 100644 --- a/drivers/char/tpm/tpm2-cmd.c +++ b/drivers/char/tpm/tpm2-cmd.c @@ -960,6 +960,7 @@ static int tpm2_get_cc_attrs_tbl(struct tpm_chip *chip)
if (nr_commands != be32_to_cpup((__be32 *)&buf.data[TPM_HEADER_SIZE + 5])) { + rc = -EFAULT; tpm_buf_destroy(&buf); goto out; }
From: Eric Dumazet edumazet@google.com
stable inclusion from linux-4.19.191 commit dd591526bca9836b7815710888cfc7685eaf2c76
--------------------------------
[ Upstream commit 40cb881b5aaa0b69a7d93dec8440d5c62dae299f ]
After adopting CONFIG_PCPU_DEV_REFCNT=n option, syzbot was able to trigger a warning [1]
Issue here is that:
- all dev_put() should be paired with a corresponding prior dev_hold().
- A driver doing a dev_put() in its ndo_uninit() MUST also do a dev_hold() in its ndo_init(), only when ndo_init() is returning 0.
Otherwise, register_netdevice() would call ndo_uninit() in its error path and release a refcount too soon.
Therefore, we need to move dev_hold() call from vti6_tnl_create2() to vti6_dev_init_gen()
[1] WARNING: CPU: 0 PID: 15951 at lib/refcount.c:31 refcount_warn_saturate+0xbf/0x1e0 lib/refcount.c:31 Modules linked in: CPU: 0 PID: 15951 Comm: syz-executor.3 Not tainted 5.12.0-rc4-syzkaller #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 RIP: 0010:refcount_warn_saturate+0xbf/0x1e0 lib/refcount.c:31 Code: 1d 6a 5a e8 09 31 ff 89 de e8 8d 1a ab fd 84 db 75 e0 e8 d4 13 ab fd 48 c7 c7 a0 e1 c1 89 c6 05 4a 5a e8 09 01 e8 2e 36 fb 04 <0f> 0b eb c4 e8 b8 13 ab fd 0f b6 1d 39 5a e8 09 31 ff 89 de e8 58 RSP: 0018:ffffc90001eaef28 EFLAGS: 00010282 RAX: 0000000000000000 RBX: 0000000000000000 RCX: 0000000000000000 RDX: 0000000000040000 RSI: ffffffff815c51f5 RDI: fffff520003d5dd7 RBP: 0000000000000004 R08: 0000000000000000 R09: 0000000000000000 R10: ffffffff815bdf8e R11: 0000000000000000 R12: ffff88801bb1c568 R13: ffff88801f69e800 R14: 00000000ffffffff R15: ffff888050889d40 FS: 00007fc79314e700(0000) GS:ffff8880b9c00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007f1c1ff47108 CR3: 0000000020fd5000 CR4: 00000000001506f0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: __refcount_dec include/linux/refcount.h:344 [inline] refcount_dec include/linux/refcount.h:359 [inline] dev_put include/linux/netdevice.h:4135 [inline] vti6_dev_uninit+0x31a/0x360 net/ipv6/ip6_vti.c:297 register_netdevice+0xadf/0x1500 net/core/dev.c:10308 vti6_tnl_create2+0x1b5/0x400 net/ipv6/ip6_vti.c:190 vti6_newlink+0x9d/0xd0 net/ipv6/ip6_vti.c:1020 __rtnl_newlink+0x1062/0x1710 net/core/rtnetlink.c:3443 rtnl_newlink+0x64/0xa0 net/core/rtnetlink.c:3491 rtnetlink_rcv_msg+0x44e/0xad0 net/core/rtnetlink.c:5553 netlink_rcv_skb+0x153/0x420 net/netlink/af_netlink.c:2502 netlink_unicast_kernel net/netlink/af_netlink.c:1312 [inline] netlink_unicast+0x533/0x7d0 net/netlink/af_netlink.c:1338 netlink_sendmsg+0x856/0xd90 net/netlink/af_netlink.c:1927 sock_sendmsg_nosec net/socket.c:654 [inline] sock_sendmsg+0xcf/0x120 net/socket.c:674 ____sys_sendmsg+0x331/0x810 net/socket.c:2350 ___sys_sendmsg+0xf3/0x170 net/socket.c:2404 __sys_sendmmsg+0x195/0x470 net/socket.c:2490 __do_sys_sendmmsg net/socket.c:2519 [inline] __se_sys_sendmmsg net/socket.c:2516 [inline] __x64_sys_sendmmsg+0x99/0x100 net/socket.c:2516
Signed-off-by: Eric Dumazet edumazet@google.com Signed-off-by: David S. Miller davem@davemloft.net Signed-off-by: Sasha Levin sashal@kernel.org Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- net/ipv6/ip6_vti.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/net/ipv6/ip6_vti.c b/net/ipv6/ip6_vti.c index 94f16e82a4581..defa04b38ee8d 100644 --- a/net/ipv6/ip6_vti.c +++ b/net/ipv6/ip6_vti.c @@ -196,7 +196,6 @@ static int vti6_tnl_create2(struct net_device *dev)
strcpy(t->parms.name, dev->name);
- dev_hold(dev); vti6_tnl_link(ip6n, t);
return 0; @@ -925,6 +924,7 @@ static inline int vti6_dev_init_gen(struct net_device *dev) dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats); if (!dev->tstats) return -ENOMEM; + dev_hold(dev); return 0; }
From: Miklos Szeredi mszeredi@redhat.com
stable inclusion from linux-4.19.191 commit 2bb2ff46d21d6794fd298df177199a2bb3b98434
--------------------------------
[ Upstream commit 8217673d07256b22881127bf50dce874d0e51653 ]
For cloned connections cuse_channel_release() will be called more than once, resulting in use after free.
Prevent device cloning for CUSE, which does not make sense at this point, and highly unlikely to be used in real life.
Signed-off-by: Miklos Szeredi mszeredi@redhat.com Signed-off-by: Sasha Levin sashal@kernel.org Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- fs/fuse/cuse.c | 2 ++ 1 file changed, 2 insertions(+)
diff --git a/fs/fuse/cuse.c b/fs/fuse/cuse.c index f057c213c453a..e10e2b62ccf45 100644 --- a/fs/fuse/cuse.c +++ b/fs/fuse/cuse.c @@ -621,6 +621,8 @@ static int __init cuse_init(void) cuse_channel_fops.owner = THIS_MODULE; cuse_channel_fops.open = cuse_channel_open; cuse_channel_fops.release = cuse_channel_release; + /* CUSE is not prepared for FUSE_DEV_IOC_CLONE */ + cuse_channel_fops.unlocked_ioctl = NULL;
cuse_class = class_create(THIS_MODULE, "cuse"); if (IS_ERR(cuse_class))
From: "Gustavo A. R. Silva" gustavoars@kernel.org
stable inclusion from linux-4.19.191 commit b51e7468116bdf3c31424bf1623b890300d828a7
--------------------------------
[ Upstream commit e5272ad4aab347dde5610c0aedb786219e3ff793 ]
Fix the following out-of-bounds warning:
net/sctp/sm_make_chunk.c:3150:4: warning: 'memcpy' offset [17, 28] from the object at 'addr' is out of the bounds of referenced subobject 'v4' with type 'struct sockaddr_in' at offset 0 [-Warray-bounds]
This helps with the ongoing efforts to globally enable -Warray-bounds and get us closer to being able to tighten the FORTIFY_SOURCE routines on memcpy().
Link: https://github.com/KSPP/linux/issues/109 Reported-by: kernel test robot lkp@intel.com Signed-off-by: Gustavo A. R. Silva gustavoars@kernel.org Reviewed-by: Kees Cook keescook@chromium.org Acked-by: Marcelo Ricardo Leitner marcelo.leitner@gmail.com Signed-off-by: David S. Miller davem@davemloft.net Signed-off-by: Sasha Levin sashal@kernel.org Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- net/sctp/sm_make_chunk.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c index ce6053be60bc8..dc51e14f568ee 100644 --- a/net/sctp/sm_make_chunk.c +++ b/net/sctp/sm_make_chunk.c @@ -3148,7 +3148,7 @@ static __be16 sctp_process_asconf_param(struct sctp_association *asoc, * primary. */ if (af->is_any(&addr)) - memcpy(&addr.v4, sctp_source(asconf), sizeof(addr)); + memcpy(&addr, sctp_source(asconf), sizeof(addr));
if (security_sctp_bind_connect(asoc->ep->base.sk, SCTP_PARAM_SET_PRIMARY,
From: "Gustavo A. R. Silva" gustavoars@kernel.org
stable inclusion from linux-4.19.191 commit b1352870018fa0cff265ff1357434195e8481560
--------------------------------
[ Upstream commit c1d9e34e11281a8ba1a1c54e4db554232a461488 ]
Fix the following out-of-bounds warning:
net/ethtool/ioctl.c:492:2: warning: 'memcpy' offset [49, 84] from the object at 'link_usettings' is out of the bounds of referenced subobject 'base' with type 'struct ethtool_link_settings' at offset 0 [-Warray-bounds]
The problem is that the original code is trying to copy data into a some struct members adjacent to each other in a single call to memcpy(). This causes a legitimate compiler warning because memcpy() overruns the length of &link_usettings.base. Fix this by directly using &link_usettings and _from_ as destination and source addresses, instead.
This helps with the ongoing efforts to globally enable -Warray-bounds and get us closer to being able to tighten the FORTIFY_SOURCE routines on memcpy().
Link: https://github.com/KSPP/linux/issues/109 Reported-by: kernel test robot lkp@intel.com Signed-off-by: Gustavo A. R. Silva gustavoars@kernel.org Signed-off-by: David S. Miller davem@davemloft.net Signed-off-by: Sasha Levin sashal@kernel.org Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- net/core/ethtool.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/net/core/ethtool.c b/net/core/ethtool.c index 1011625a0ca4b..83028017c26dd 100644 --- a/net/core/ethtool.c +++ b/net/core/ethtool.c @@ -618,7 +618,7 @@ store_link_ksettings_for_user(void __user *to, { struct ethtool_link_usettings link_usettings;
- memcpy(&link_usettings.base, &from->base, sizeof(link_usettings)); + memcpy(&link_usettings, from, sizeof(link_usettings)); bitmap_to_arr32(link_usettings.link_modes.supported, from->link_modes.supported, __ETHTOOL_LINK_MODE_MASK_NBITS);
From: Dmitry Baryshkov dmitry.baryshkov@linaro.org
stable inclusion from linux-4.19.191 commit 691062feb4ed8303be75ab07c5c1e09311bd8c80
--------------------------------
[ Upstream commit c99e755a4a4c165cad6effb39faffd0f3377c02d ]
In pci_scan_device(), if pci_setup_device() fails for any reason, the code will not release device's of_node by calling pci_release_of_node(). Fix that by calling the release function.
Fixes: 98d9f30c820d ("pci/of: Match PCI devices to OF nodes dynamically") Link: https://lore.kernel.org/r/20210124232826.1879-1-dmitry.baryshkov@linaro.org Signed-off-by: Dmitry Baryshkov dmitry.baryshkov@linaro.org Signed-off-by: Bjorn Helgaas bhelgaas@google.com Reviewed-by: Leon Romanovsky leonro@nvidia.com Signed-off-by: Sasha Levin sashal@kernel.org Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- drivers/pci/probe.c | 1 + 1 file changed, 1 insertion(+)
diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c index 2fae1bc5bf884..3731d17e1af1d 100644 --- a/drivers/pci/probe.c +++ b/drivers/pci/probe.c @@ -2779,6 +2779,7 @@ static struct pci_dev *pci_scan_device(struct pci_bus *bus, int devfn) pci_set_of_node(dev);
if (pci_setup_device(dev)) { + pci_release_of_node(dev); pci_bus_put(dev->bus); kfree(dev); return NULL;
From: Trond Myklebust trond.myklebust@hammerspace.com
stable inclusion from linux-4.19.191 commit e5370bd9e419fcac4f8cf7b242455ba121212037
--------------------------------
[ Upstream commit 99f23783224355e7022ceea9b8d9f62c0fd01bd8 ]
Whether we're allocating or delallocating space, we should flush out the pending writes in order to avoid races with attribute updates.
Fixes: 1e564d3dbd68 ("NFSv4.2: Fix a race in nfs42_proc_deallocate()") Signed-off-by: Trond Myklebust trond.myklebust@hammerspace.com Signed-off-by: Sasha Levin sashal@kernel.org Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- fs/nfs/nfs42proc.c | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-)
diff --git a/fs/nfs/nfs42proc.c b/fs/nfs/nfs42proc.c index 526441de89c1d..da7b73ad811f2 100644 --- a/fs/nfs/nfs42proc.c +++ b/fs/nfs/nfs42proc.c @@ -59,7 +59,8 @@ static int _nfs42_proc_fallocate(struct rpc_message *msg, struct file *filep, static int nfs42_proc_fallocate(struct rpc_message *msg, struct file *filep, loff_t offset, loff_t len) { - struct nfs_server *server = NFS_SERVER(file_inode(filep)); + struct inode *inode = file_inode(filep); + struct nfs_server *server = NFS_SERVER(inode); struct nfs4_exception exception = { }; struct nfs_lock_context *lock; int err; @@ -68,9 +69,13 @@ static int nfs42_proc_fallocate(struct rpc_message *msg, struct file *filep, if (IS_ERR(lock)) return PTR_ERR(lock);
- exception.inode = file_inode(filep); + exception.inode = inode; exception.state = lock->open_context->state;
+ err = nfs_sync_inode(inode); + if (err) + goto out; + do { err = _nfs42_proc_fallocate(msg, filep, lock, offset, len); if (err == -ENOTSUPP) { @@ -79,7 +84,7 @@ static int nfs42_proc_fallocate(struct rpc_message *msg, struct file *filep, } err = nfs4_handle_exception(server, err, &exception); } while (exception.retry); - +out: nfs_put_lock_context(lock); return err; } @@ -117,16 +122,13 @@ int nfs42_proc_deallocate(struct file *filep, loff_t offset, loff_t len) return -EOPNOTSUPP;
inode_lock(inode); - err = nfs_sync_inode(inode); - if (err) - goto out_unlock;
err = nfs42_proc_fallocate(&msg, filep, offset, len); if (err == 0) truncate_pagecache_range(inode, offset, (offset + len) -1); if (err == -EOPNOTSUPP) NFS_SERVER(inode)->caps &= ~NFS_CAP_DEALLOCATE; -out_unlock: + inode_unlock(inode); return err; }
From: Trond Myklebust trond.myklebust@hammerspace.com
stable inclusion from linux-4.19.191 commit e6681459e47a09fe61eac157aff93f2326d64025
--------------------------------
[ Upstream commit 9fdbfad1777cb4638f489eeb62d85432010c0031 ]
We need to use unsigned long subtraction and then convert to signed in order to deal correcly with C overflow rules.
Fixes: f5062003465c ("NFS: Set an attribute barrier on all updates") Signed-off-by: Trond Myklebust trond.myklebust@hammerspace.com Signed-off-by: Sasha Levin sashal@kernel.org Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- fs/nfs/inode.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-)
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 905041eb24fee..c9670d822e1ca 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -1609,10 +1609,10 @@ EXPORT_SYMBOL_GPL(_nfs_display_fhandle); */ static int nfs_inode_attrs_need_update(const struct inode *inode, const struct nfs_fattr *fattr) { - const struct nfs_inode *nfsi = NFS_I(inode); + unsigned long attr_gencount = NFS_I(inode)->attr_gencount;
- return ((long)fattr->gencount - (long)nfsi->attr_gencount) > 0 || - ((long)nfsi->attr_gencount - (long)nfs_read_attr_generation_counter() > 0); + return (long)(fattr->gencount - attr_gencount) > 0 || + (long)(attr_gencount - nfs_read_attr_generation_counter()) > 0; }
static int nfs_refresh_inode_locked(struct inode *inode, struct nfs_fattr *fattr) @@ -2042,7 +2042,7 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr) nfsi->attrtimeo_timestamp = now; } /* Set the barrier to be more recent than this fattr */ - if ((long)fattr->gencount - (long)nfsi->attr_gencount > 0) + if ((long)(fattr->gencount - nfsi->attr_gencount) > 0) nfsi->attr_gencount = fattr->gencount; }
From: Nikola Livic nlivic@gmail.com
stable inclusion from linux-4.19.191 commit f27638a92f77d8107efbaf48a0d3bfa24da8cdad
--------------------------------
[ Upstream commit ed34695e15aba74f45247f1ee2cf7e09d449f925 ]
We (adam zabrocki, alexander matrosov, alexander tereshkin, maksym bazalii) observed the check:
if (fh->size > sizeof(struct nfs_fh))
should not use the size of the nfs_fh struct which includes an extra two bytes from the size field.
struct nfs_fh { unsigned short size; unsigned char data[NFS_MAXFHSIZE]; }
but should determine the size from data[NFS_MAXFHSIZE] so the memcpy will not write 2 bytes beyond destination. The proposed fix is to compare against the NFS_MAXFHSIZE directly, as is done elsewhere in fs code base.
Fixes: d67ae825a59d ("pnfs/flexfiles: Add the FlexFile Layout Driver") Signed-off-by: Nikola Livic nlivic@gmail.com Signed-off-by: Dan Carpenter dan.carpenter@oracle.com Signed-off-by: Trond Myklebust trond.myklebust@hammerspace.com Signed-off-by: Sasha Levin sashal@kernel.org Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- fs/nfs/flexfilelayout/flexfilelayout.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/fs/nfs/flexfilelayout/flexfilelayout.c b/fs/nfs/flexfilelayout/flexfilelayout.c index e0fe9a0f1bf18..81843dd8b0ee9 100644 --- a/fs/nfs/flexfilelayout/flexfilelayout.c +++ b/fs/nfs/flexfilelayout/flexfilelayout.c @@ -101,7 +101,7 @@ static int decode_nfs_fh(struct xdr_stream *xdr, struct nfs_fh *fh) if (unlikely(!p)) return -ENOBUFS; fh->size = be32_to_cpup(p++); - if (fh->size > sizeof(struct nfs_fh)) { + if (fh->size > NFS_MAXFHSIZE) { printk(KERN_ERR "NFS flexfiles: Too big fh received %d\n", fh->size); return -EOVERFLOW;
From: Olga Kornievskaia kolga@netapp.com
stable inclusion from linux-4.19.191 commit 28a55a8a49f4ae334a8a6d4c1ad429ff7978bb4f
--------------------------------
[ Upstream commit 73f5c88f521a630ea1628beb9c2d48a2e777a419 ]
Currently the client ignores the value of the sr_eof of the SEEK operation. According to the spec, if the server didn't find the requested extent and reached the end of the file, the server would return sr_eof=true. In case the request for DATA and no data was found (ie in the middle of the hole), then the lseek expects that ENXIO would be returned.
Fixes: 1c6dcbe5ceff8 ("NFS: Implement SEEK") Signed-off-by: Olga Kornievskaia kolga@netapp.com Signed-off-by: Trond Myklebust trond.myklebust@hammerspace.com Signed-off-by: Sasha Levin sashal@kernel.org Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- fs/nfs/nfs42proc.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-)
diff --git a/fs/nfs/nfs42proc.c b/fs/nfs/nfs42proc.c index da7b73ad811f2..be252795a6f70 100644 --- a/fs/nfs/nfs42proc.c +++ b/fs/nfs/nfs42proc.c @@ -500,7 +500,10 @@ static loff_t _nfs42_proc_llseek(struct file *filep, if (status) return status;
- return vfs_setpos(filep, res.sr_offset, inode->i_sb->s_maxbytes); + if (whence == SEEK_DATA && res.sr_eof) + return -NFS4ERR_NXIO; + else + return vfs_setpos(filep, res.sr_offset, inode->i_sb->s_maxbytes); }
loff_t nfs42_proc_llseek(struct file *filep, loff_t offset, int whence)
From: Xin Long lucien.xin@gmail.com
stable inclusion from linux-4.19.191 commit d624f2991b977821375fbd56c91b0c91d456a697
--------------------------------
[ Upstream commit 35b4f24415c854cd718ccdf38dbea6297f010aae ]
There's a panic that occurs in a few of envs, the call trace is as below:
[] general protection fault, ... 0x29acd70f1000a: 0000 [#1] SMP PTI [] RIP: 0010:sctp_ulpevent_notify_peer_addr_change+0x4b/0x1fa [sctp] [] sctp_assoc_control_transport+0x1b9/0x210 [sctp] [] sctp_do_8_2_transport_strike.isra.16+0x15c/0x220 [sctp] [] sctp_cmd_interpreter.isra.21+0x1231/0x1a10 [sctp] [] sctp_do_sm+0xc3/0x2a0 [sctp] [] sctp_generate_timeout_event+0x81/0xf0 [sctp]
This is caused by a transport use-after-free issue. When processing a duplicate COOKIE-ECHO chunk in sctp_sf_do_dupcook_a(), both COOKIE-ACK and SHUTDOWN chunks are allocated with the transort from the new asoc. However, later in the sideeffect machine, the old asoc is used to send them out and old asoc's shutdown_last_sent_to is set to the transport that SHUTDOWN chunk attached to in sctp_cmd_setup_t2(), which actually belongs to the new asoc. After the new_asoc is freed and the old asoc T2 timeout, the old asoc's shutdown_last_sent_to that is already freed would be accessed in sctp_sf_t2_timer_expire().
Thanks Alexander and Jere for helping dig into this issue.
To fix it, this patch is to do the asoc update first, then allocate the COOKIE-ACK and SHUTDOWN chunks with the 'updated' old asoc. This would make more sense, as a chunk from an asoc shouldn't be sent out with another asoc. We had fixed quite a few issues caused by this.
Fixes: 145cb2f7177d ("sctp: Fix bundling of SHUTDOWN with COOKIE-ACK") Reported-by: Alexander Sverdlin alexander.sverdlin@nokia.com Reported-by: syzbot+bbe538efd1046586f587@syzkaller.appspotmail.com Reported-by: Michal Tesar mtesar@redhat.com Signed-off-by: Xin Long lucien.xin@gmail.com Acked-by: Marcelo Ricardo Leitner marcelo.leitner@gmail.com Signed-off-by: David S. Miller davem@davemloft.net Signed-off-by: Sasha Levin sashal@kernel.org Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- net/sctp/sm_statefuns.c | 25 ++++++++++++++++++++----- 1 file changed, 20 insertions(+), 5 deletions(-)
diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c index b9f7250b5c459..e03754f5f185e 100644 --- a/net/sctp/sm_statefuns.c +++ b/net/sctp/sm_statefuns.c @@ -1856,20 +1856,35 @@ static enum sctp_disposition sctp_sf_do_dupcook_a( SCTP_TO(SCTP_EVENT_TIMEOUT_T4_RTO)); sctp_add_cmd_sf(commands, SCTP_CMD_PURGE_ASCONF_QUEUE, SCTP_NULL());
- repl = sctp_make_cookie_ack(new_asoc, chunk); + /* Update the content of current association. */ + if (sctp_assoc_update((struct sctp_association *)asoc, new_asoc)) { + struct sctp_chunk *abort; + + abort = sctp_make_abort(asoc, NULL, sizeof(struct sctp_errhdr)); + if (abort) { + sctp_init_cause(abort, SCTP_ERROR_RSRC_LOW, 0); + sctp_add_cmd_sf(commands, SCTP_CMD_REPLY, SCTP_CHUNK(abort)); + } + sctp_add_cmd_sf(commands, SCTP_CMD_SET_SK_ERR, SCTP_ERROR(ECONNABORTED)); + sctp_add_cmd_sf(commands, SCTP_CMD_ASSOC_FAILED, + SCTP_PERR(SCTP_ERROR_RSRC_LOW)); + SCTP_INC_STATS(net, SCTP_MIB_ABORTEDS); + SCTP_DEC_STATS(net, SCTP_MIB_CURRESTAB); + goto nomem; + } + + repl = sctp_make_cookie_ack(asoc, chunk); if (!repl) goto nomem;
/* Report association restart to upper layer. */ ev = sctp_ulpevent_make_assoc_change(asoc, 0, SCTP_RESTART, 0, - new_asoc->c.sinit_num_ostreams, - new_asoc->c.sinit_max_instreams, + asoc->c.sinit_num_ostreams, + asoc->c.sinit_max_instreams, NULL, GFP_ATOMIC); if (!ev) goto nomem_ev;
- /* Update the content of current association. */ - sctp_add_cmd_sf(commands, SCTP_CMD_UPDATE_ASSOC, SCTP_ASOC(new_asoc)); sctp_add_cmd_sf(commands, SCTP_CMD_EVENT_ULP, SCTP_ULPEVENT(ev)); if ((sctp_state(asoc, SHUTDOWN_PENDING) || sctp_state(asoc, SHUTDOWN_SENT)) &&
From: Xin Long lucien.xin@gmail.com
stable inclusion from linux-4.19.191 commit 80ff006023cc5f1430a73063ec4de415d82df424
--------------------------------
[ Upstream commit f282df0391267fb2b263da1cc3233aa6fb81defc ]
Normally SCTP_MIB_CURRESTAB is always incremented once asoc enter into ESTABLISHED from the state < ESTABLISHED and decremented when the asoc is being deleted.
However, in sctp_sf_do_dupcook_b(), the asoc's state can be changed to ESTABLISHED from the state >= ESTABLISHED where it shouldn't increment SCTP_MIB_CURRESTAB. Otherwise, one asoc may increment MIB_CURRESTAB multiple times but only decrement once at the end.
I was able to reproduce it by using scapy to do the 4-way shakehands, after that I replayed the COOKIE-ECHO chunk with 'peer_vtag' field changed to different values, and SCTP_MIB_CURRESTAB was incremented multiple times and never went back to 0 even when the asoc was freed.
This patch is to fix it by only incrementing SCTP_MIB_CURRESTAB when the state < ESTABLISHED in sctp_sf_do_dupcook_b().
Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Reported-by: Marcelo Ricardo Leitner marcelo.leitner@gmail.com Signed-off-by: Xin Long lucien.xin@gmail.com Signed-off-by: David S. Miller davem@davemloft.net Signed-off-by: Sasha Levin sashal@kernel.org Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- net/sctp/sm_statefuns.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c index e03754f5f185e..19bd14a4eb07e 100644 --- a/net/sctp/sm_statefuns.c +++ b/net/sctp/sm_statefuns.c @@ -1948,7 +1948,8 @@ static enum sctp_disposition sctp_sf_do_dupcook_b( sctp_add_cmd_sf(commands, SCTP_CMD_UPDATE_ASSOC, SCTP_ASOC(new_asoc)); sctp_add_cmd_sf(commands, SCTP_CMD_NEW_STATE, SCTP_STATE(SCTP_STATE_ESTABLISHED)); - SCTP_INC_STATS(net, SCTP_MIB_CURRESTAB); + if (asoc->state < SCTP_STATE_ESTABLISHED) + SCTP_INC_STATS(net, SCTP_MIB_CURRESTAB); sctp_add_cmd_sf(commands, SCTP_CMD_HB_TIMERS_START, SCTP_NULL());
repl = sctp_make_cookie_ack(new_asoc, chunk);
From: Pablo Neira Ayuso pablo@netfilter.org
stable inclusion from linux-4.19.191 commit 20bff2f8c3e47d17214126e39db88747f7fd7399
--------------------------------
[ Upstream commit c7d13358b6a2f49f81a34aa323a2d0878a0532a2 ]
This extension breaks when trying to delete rules, add a new revision to fix this.
Fixes: 5e6874cdb8de ("[SECMARK]: Add xtables SECMARK target") Signed-off-by: Phil Sutter phil@nwl.cc Signed-off-by: Pablo Neira Ayuso pablo@netfilter.org Signed-off-by: Sasha Levin sashal@kernel.org Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- include/uapi/linux/netfilter/xt_SECMARK.h | 6 ++ net/netfilter/xt_SECMARK.c | 88 ++++++++++++++++++----- 2 files changed, 75 insertions(+), 19 deletions(-)
diff --git a/include/uapi/linux/netfilter/xt_SECMARK.h b/include/uapi/linux/netfilter/xt_SECMARK.h index 1f2a708413f5d..beb2cadba8a9c 100644 --- a/include/uapi/linux/netfilter/xt_SECMARK.h +++ b/include/uapi/linux/netfilter/xt_SECMARK.h @@ -20,4 +20,10 @@ struct xt_secmark_target_info { char secctx[SECMARK_SECCTX_MAX]; };
+struct xt_secmark_target_info_v1 { + __u8 mode; + char secctx[SECMARK_SECCTX_MAX]; + __u32 secid; +}; + #endif /*_XT_SECMARK_H_target */ diff --git a/net/netfilter/xt_SECMARK.c b/net/netfilter/xt_SECMARK.c index f16202d26c205..25d2ac88e6287 100644 --- a/net/netfilter/xt_SECMARK.c +++ b/net/netfilter/xt_SECMARK.c @@ -30,10 +30,9 @@ MODULE_ALIAS("ip6t_SECMARK"); static u8 mode;
static unsigned int -secmark_tg(struct sk_buff *skb, const struct xt_action_param *par) +secmark_tg(struct sk_buff *skb, const struct xt_secmark_target_info_v1 *info) { u32 secmark = 0; - const struct xt_secmark_target_info *info = par->targinfo;
switch (mode) { case SECMARK_MODE_SEL: @@ -47,7 +46,7 @@ secmark_tg(struct sk_buff *skb, const struct xt_action_param *par) return XT_CONTINUE; }
-static int checkentry_lsm(struct xt_secmark_target_info *info) +static int checkentry_lsm(struct xt_secmark_target_info_v1 *info) { int err;
@@ -79,15 +78,15 @@ static int checkentry_lsm(struct xt_secmark_target_info *info) return 0; }
-static int secmark_tg_check(const struct xt_tgchk_param *par) +static int +secmark_tg_check(const char *table, struct xt_secmark_target_info_v1 *info) { - struct xt_secmark_target_info *info = par->targinfo; int err;
- if (strcmp(par->table, "mangle") != 0 && - strcmp(par->table, "security") != 0) { + if (strcmp(table, "mangle") != 0 && + strcmp(table, "security") != 0) { pr_info_ratelimited("only valid in 'mangle' or 'security' table, not '%s'\n", - par->table); + table); return -EINVAL; }
@@ -122,25 +121,76 @@ static void secmark_tg_destroy(const struct xt_tgdtor_param *par) } }
-static struct xt_target secmark_tg_reg __read_mostly = { - .name = "SECMARK", - .revision = 0, - .family = NFPROTO_UNSPEC, - .checkentry = secmark_tg_check, - .destroy = secmark_tg_destroy, - .target = secmark_tg, - .targetsize = sizeof(struct xt_secmark_target_info), - .me = THIS_MODULE, +static int secmark_tg_check_v0(const struct xt_tgchk_param *par) +{ + struct xt_secmark_target_info *info = par->targinfo; + struct xt_secmark_target_info_v1 newinfo = { + .mode = info->mode, + }; + int ret; + + memcpy(newinfo.secctx, info->secctx, SECMARK_SECCTX_MAX); + + ret = secmark_tg_check(par->table, &newinfo); + info->secid = newinfo.secid; + + return ret; +} + +static unsigned int +secmark_tg_v0(struct sk_buff *skb, const struct xt_action_param *par) +{ + const struct xt_secmark_target_info *info = par->targinfo; + struct xt_secmark_target_info_v1 newinfo = { + .secid = info->secid, + }; + + return secmark_tg(skb, &newinfo); +} + +static int secmark_tg_check_v1(const struct xt_tgchk_param *par) +{ + return secmark_tg_check(par->table, par->targinfo); +} + +static unsigned int +secmark_tg_v1(struct sk_buff *skb, const struct xt_action_param *par) +{ + return secmark_tg(skb, par->targinfo); +} + +static struct xt_target secmark_tg_reg[] __read_mostly = { + { + .name = "SECMARK", + .revision = 0, + .family = NFPROTO_UNSPEC, + .checkentry = secmark_tg_check_v0, + .destroy = secmark_tg_destroy, + .target = secmark_tg_v0, + .targetsize = sizeof(struct xt_secmark_target_info), + .me = THIS_MODULE, + }, + { + .name = "SECMARK", + .revision = 1, + .family = NFPROTO_UNSPEC, + .checkentry = secmark_tg_check_v1, + .destroy = secmark_tg_destroy, + .target = secmark_tg_v1, + .targetsize = sizeof(struct xt_secmark_target_info_v1), + .usersize = offsetof(struct xt_secmark_target_info_v1, secid), + .me = THIS_MODULE, + }, };
static int __init secmark_tg_init(void) { - return xt_register_target(&secmark_tg_reg); + return xt_register_targets(secmark_tg_reg, ARRAY_SIZE(secmark_tg_reg)); }
static void __exit secmark_tg_exit(void) { - xt_unregister_target(&secmark_tg_reg); + xt_unregister_targets(secmark_tg_reg, ARRAY_SIZE(secmark_tg_reg)); }
module_init(secmark_tg_init);
From: Miaohe Lin linmiaohe@huawei.com
stable inclusion from linux-4.19.191 commit fdacfd77695f8c279bc7334cd1bea50c28bf4729
--------------------------------
[ Upstream commit 74e579bf231a337ab3786d59e64bc94f45ca7b3f ]
In writable and !referenced case, the result value should be SCAN_LACK_REFERENCED_PAGE for trace_mm_collapse_huge_page_isolate() instead of default 0 (SCAN_FAIL) here.
Link: https://lkml.kernel.org/r/20210306032947.35921-5-linmiaohe@huawei.com Fixes: 7d2eba0557c1 ("mm: add tracepoint for scanning pages") Signed-off-by: Miaohe Lin linmiaohe@huawei.com Acked-by: Kirill A. Shutemov kirill.shutemov@linux.intel.com Cc: Dan Carpenter dan.carpenter@oracle.com Cc: Ebru Akagunduz ebru.akagunduz@gmail.com Cc: Mike Kravetz mike.kravetz@oracle.com Cc: Rik van Riel riel@redhat.com Signed-off-by: Andrew Morton akpm@linux-foundation.org Signed-off-by: Linus Torvalds torvalds@linux-foundation.org Signed-off-by: Sasha Levin sashal@kernel.org Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- mm/khugepaged.c | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-)
diff --git a/mm/khugepaged.c b/mm/khugepaged.c index 669404342fbe6..419d1c1f0fbd9 100644 --- a/mm/khugepaged.c +++ b/mm/khugepaged.c @@ -616,17 +616,17 @@ static int __collapse_huge_page_isolate(struct vm_area_struct *vma, mmu_notifier_test_young(vma->vm_mm, address)) referenced++; } - if (likely(writable)) { - if (likely(referenced)) { - result = SCAN_SUCCEED; - trace_mm_collapse_huge_page_isolate(page, none_or_zero, - referenced, writable, result); - return 1; - } - } else { + + if (unlikely(!writable)) { result = SCAN_PAGE_RO; + } else if (unlikely(!referenced)) { + result = SCAN_LACK_REFERENCED_PAGE; + } else { + result = SCAN_SUCCEED; + trace_mm_collapse_huge_page_isolate(page, none_or_zero, + referenced, writable, result); + return 1; } - out: release_pte_pages(pte, _pte); trace_mm_collapse_huge_page_isolate(page, none_or_zero,
From: Miaohe Lin linmiaohe@huawei.com
stable inclusion from linux-4.19.191 commit 2e8b30d7f8b5f55539659039a5e1ae2803002a22
--------------------------------
[ Upstream commit da56388c4397878a65b74f7fe97760f5aa7d316b ]
A rare out of memory error would prevent removal of the reserve map region for a page. hugetlb_fix_reserve_counts() handles this rare case to avoid dangling with incorrect counts. Unfortunately, hugepage_subpool_get_pages and hugetlb_acct_memory could possibly fail too. We should correctly handle these cases.
Link: https://lkml.kernel.org/r/20210410072348.20437-5-linmiaohe@huawei.com Fixes: b5cec28d36f5 ("hugetlbfs: truncate_hugepages() takes a range of pages") Signed-off-by: Miaohe Lin linmiaohe@huawei.com Cc: Feilong Lin linfeilong@huawei.com Cc: Mike Kravetz mike.kravetz@oracle.com Signed-off-by: Andrew Morton akpm@linux-foundation.org Signed-off-by: Linus Torvalds torvalds@linux-foundation.org Signed-off-by: Sasha Levin sashal@kernel.org Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- mm/hugetlb.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-)
diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 921aca52726d4..726a3aa9d19d1 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -590,13 +590,20 @@ void hugetlb_fix_reserve_counts(struct inode *inode) { struct hugepage_subpool *spool = subpool_inode(inode); long rsv_adjust; + bool reserved = false;
rsv_adjust = hugepage_subpool_get_pages(spool, 1); - if (rsv_adjust) { + if (rsv_adjust > 0) { struct hstate *h = hstate_inode(inode);
- hugetlb_acct_memory(h, 1); + if (!hugetlb_acct_memory(h, 1)) + reserved = true; + } else if (!rsv_adjust) { + reserved = true; } + + if (!reserved) + pr_warn("hugetlb: Huge Page Reserved count may go negative.\n"); }
/*
From: Miaohe Lin linmiaohe@huawei.com
stable inclusion from linux-4.19.191 commit 3a794e45d90bca72ee8aee5336a7826470493d0e
--------------------------------
[ Upstream commit c89a384e2551c692a9fe60d093fd7080f50afc51 ]
When removing rmap_item from stable tree, STABLE_FLAG of rmap_item is cleared with head reserved. So the following scenario might happen: For ksm page with rmap_item1:
cmp_and_merge_page stable_node->head = &migrate_nodes; remove_rmap_item_from_tree, but head still equal to stable_node; try_to_merge_with_ksm_page failed; return;
For the same ksm page with rmap_item2, stable node migration succeed this time. The stable_node->head does not equal to migrate_nodes now. For ksm page with rmap_item1 again:
cmp_and_merge_page stable_node->head != &migrate_nodes && rmap_item->head == stable_node return;
We would miss the rmap_item for stable_node and might result in failed rmap_walk_ksm(). Fix this by set rmap_item->head to NULL when rmap_item is removed from stable tree.
Link: https://lkml.kernel.org/r/20210330140228.45635-5-linmiaohe@huawei.com Fixes: 4146d2d673e8 ("ksm: make !merge_across_nodes migration safe") Signed-off-by: Miaohe Lin linmiaohe@huawei.com Cc: Hugh Dickins hughd@google.com Signed-off-by: Andrew Morton akpm@linux-foundation.org Signed-off-by: Linus Torvalds torvalds@linux-foundation.org Signed-off-by: Sasha Levin sashal@kernel.org Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- mm/ksm.c | 1 + 1 file changed, 1 insertion(+)
diff --git a/mm/ksm.c b/mm/ksm.c index edd91af5c5448..9749729a5381a 100644 --- a/mm/ksm.c +++ b/mm/ksm.c @@ -793,6 +793,7 @@ static void remove_rmap_item_from_tree(struct rmap_item *rmap_item) stable_node->rmap_hlist_len--;
put_anon_vma(rmap_item->anon_vma); + rmap_item->head = NULL; rmap_item->address &= PAGE_MASK;
} else if (rmap_item->address & UNSTABLE_FLAG) {
From: Maciej Żenczykowski maze@google.com
stable inclusion from linux-4.19.191 commit 4f21d7eb214b4751b8d285a248bbc26439fd407b
--------------------------------
[ Upstream commit 2c16db6c92b0ee4aa61e88366df82169e83c3f7e ]
Android userspace has been using TCA_KIND with a char[IFNAMESIZ] many-null-terminated buffer containing the string 'bpf'.
This works on 4.19 and ceases to work on 5.10.
I'm not entirely sure what fixes tag to use, but I think the issue was likely introduced in the below mentioned 5.4 commit.
Reported-by: Nucca Chen nuccachen@google.com Cc: Cong Wang xiyou.wangcong@gmail.com Cc: David Ahern dsahern@gmail.com Cc: David S. Miller davem@davemloft.net Cc: Jakub Kicinski jakub.kicinski@netronome.com Cc: Jamal Hadi Salim jhs@mojatatu.com Cc: Jiri Pirko jiri@mellanox.com Cc: Jiri Pirko jiri@resnulli.us Fixes: 62794fc4fbf5 ("net_sched: add max len check for TCA_KIND") Change-Id: I66dc281f165a2858fc29a44869a270a2d698a82b Signed-off-by: David S. Miller davem@davemloft.net Signed-off-by: Sasha Levin sashal@kernel.org Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- lib/nlattr.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/lib/nlattr.c b/lib/nlattr.c index e335bcafa9e4c..00bfc6aece055 100644 --- a/lib/nlattr.c +++ b/lib/nlattr.c @@ -402,7 +402,7 @@ int nla_strcmp(const struct nlattr *nla, const char *str) int attrlen = nla_len(nla); int d;
- if (attrlen > 0 && buf[attrlen - 1] == '\0') + while (attrlen > 0 && buf[attrlen - 1] == '\0') attrlen--;
d = attrlen - len;
From: Pablo Neira Ayuso pablo@netfilter.org
stable inclusion from linux-4.19.191 commit e11924b7513ceacd7e9a0ed9f8db60629eefbf12
--------------------------------
[ Upstream commit 5e024c325406470d1165a09c6feaf8ec897936be ]
Do not assume that the tcph->doff field is correct when parsing for TCP options, skb_header_pointer() might fail to fetch these bits.
Fixes: 11eeef41d5f6 ("netfilter: passive OS fingerprint xtables match") Signed-off-by: Pablo Neira Ayuso pablo@netfilter.org Signed-off-by: Sasha Levin sashal@kernel.org Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- net/netfilter/nfnetlink_osf.c | 2 ++ 1 file changed, 2 insertions(+)
diff --git a/net/netfilter/nfnetlink_osf.c b/net/netfilter/nfnetlink_osf.c index 131f9f8c0b092..917f06110c823 100644 --- a/net/netfilter/nfnetlink_osf.c +++ b/net/netfilter/nfnetlink_osf.c @@ -191,6 +191,8 @@ static const struct tcphdr *nf_osf_hdr_ctx_init(struct nf_osf_hdr_ctx *ctx,
ctx->optp = skb_header_pointer(skb, ip_hdrlen(skb) + sizeof(struct tcphdr), ctx->optsize, opts); + if (!ctx->optp) + return NULL; }
return tcp;
From: Odin Ugedal odin@uged.al
stable inclusion from linux-4.19.191 commit 434ea8c1d1bf296a2597aeb28f6ccf62ae82f235
--------------------------------
[ Upstream commit 0258bdfaff5bd13c4d2383150b7097aecd6b6d82 ]
This fixes an issue where old load on a cfs_rq is not properly decayed, resulting in strange behavior where fairness can decrease drastically. Real workloads with equally weighted control groups have ended up getting a respective 99% and 1%(!!) of cpu time.
When an idle task is attached to a cfs_rq by attaching a pid to a cgroup, the old load of the task is attached to the new cfs_rq and sched_entity by attach_entity_cfs_rq. If the task is then moved to another cpu (and therefore cfs_rq) before being enqueued/woken up, the load will be moved to cfs_rq->removed from the sched_entity. Such a move will happen when enforcing a cpuset on the task (eg. via a cgroup) that force it to move.
The load will however not be removed from the task_group itself, making it look like there is a constant load on that cfs_rq. This causes the vruntime of tasks on other sibling cfs_rq's to increase faster than they are supposed to; causing severe fairness issues. If no other task is started on the given cfs_rq, and due to the cpuset it would not happen, this load would never be properly unloaded. With this patch the load will be properly removed inside update_blocked_averages. This also applies to tasks moved to the fair scheduling class and moved to another cpu, and this path will also fix that. For fork, the entity is queued right away, so this problem does not affect that.
This applies to cases where the new process is the first in the cfs_rq, issue introduced 3d30544f0212 ("sched/fair: Apply more PELT fixes"), and when there has previously been load on the cgroup but the cgroup was removed from the leaflist due to having null PELT load, indroduced in 039ae8bcf7a5 ("sched/fair: Fix O(nr_cgroups) in the load balancing path").
For a simple cgroup hierarchy (as seen below) with two equally weighted groups, that in theory should get 50/50 of cpu time each, it often leads to a load of 60/40 or 70/30.
parent/ cg-1/ cpu.weight: 100 cpuset.cpus: 1 cg-2/ cpu.weight: 100 cpuset.cpus: 1
If the hierarchy is deeper (as seen below), while keeping cg-1 and cg-2 equally weighted, they should still get a 50/50 balance of cpu time. This however sometimes results in a balance of 10/90 or 1/99(!!) between the task groups.
$ ps u -C stress USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND root 18568 1.1 0.0 3684 100 pts/12 R+ 13:36 0:00 stress --cpu 1 root 18580 99.3 0.0 3684 100 pts/12 R+ 13:36 0:09 stress --cpu 1
parent/ cg-1/ cpu.weight: 100 sub-group/ cpu.weight: 1 cpuset.cpus: 1 cg-2/ cpu.weight: 100 sub-group/ cpu.weight: 10000 cpuset.cpus: 1
This can be reproduced by attaching an idle process to a cgroup and moving it to a given cpuset before it wakes up. The issue is evident in many (if not most) container runtimes, and has been reproduced with both crun and runc (and therefore docker and all its "derivatives"), and with both cgroup v1 and v2.
Fixes: 3d30544f0212 ("sched/fair: Apply more PELT fixes") Fixes: 039ae8bcf7a5 ("sched/fair: Fix O(nr_cgroups) in the load balancing path") Signed-off-by: Odin Ugedal odin@uged.al Signed-off-by: Peter Zijlstra (Intel) peterz@infradead.org Reviewed-by: Vincent Guittot vincent.guittot@linaro.org Link: https://lkml.kernel.org/r/20210501141950.23622-2-odin@uged.al Signed-off-by: Sasha Levin sashal@kernel.org Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- kernel/sched/fair.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-)
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index e8b111fb51c66..08d877b57c4f2 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -10240,16 +10240,22 @@ static void propagate_entity_cfs_rq(struct sched_entity *se) { struct cfs_rq *cfs_rq;
+ list_add_leaf_cfs_rq(cfs_rq_of(se)); + /* Start to propagate at parent */ se = se->parent;
for_each_sched_entity(se) { cfs_rq = cfs_rq_of(se);
- if (cfs_rq_throttled(cfs_rq)) - break; + if (!cfs_rq_throttled(cfs_rq)){ + update_load_avg(cfs_rq, se, UPDATE_TG); + list_add_leaf_cfs_rq(cfs_rq); + continue; + }
- update_load_avg(cfs_rq, se, UPDATE_TG); + if (list_add_leaf_cfs_rq(cfs_rq)) + break; } } #else
From: Jia-Ju Bai baijiaju1990@gmail.com
stable inclusion from linux-4.19.191 commit 71bafe8dfbd81558911e38376eefecbc26458b2c
--------------------------------
[ Upstream commit 31d82c2c787d5cf65fedd35ebbc0c1bd95c1a679 ]
When vzalloc() returns NULL to sha_regions, no error return code of kexec_calculate_store_digests() is assigned. To fix this bug, ret is assigned with -ENOMEM in this case.
Link: https://lkml.kernel.org/r/20210309083904.24321-1-baijiaju1990@gmail.com Fixes: a43cac0d9dc2 ("kexec: split kexec_file syscall code to kexec_file.c") Signed-off-by: Jia-Ju Bai baijiaju1990@gmail.com Reported-by: TOTE Robot oslab@tsinghua.edu.cn Acked-by: Baoquan He bhe@redhat.com Signed-off-by: Andrew Morton akpm@linux-foundation.org Signed-off-by: Linus Torvalds torvalds@linux-foundation.org Signed-off-by: Sasha Levin sashal@kernel.org Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- kernel/kexec_file.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-)
diff --git a/kernel/kexec_file.c b/kernel/kexec_file.c index c6a3b6851372c..04f99368bfb3d 100644 --- a/kernel/kexec_file.c +++ b/kernel/kexec_file.c @@ -626,8 +626,10 @@ static int kexec_calculate_store_digests(struct kimage *image)
sha_region_sz = KEXEC_SEGMENT_MAX * sizeof(struct kexec_sha_region); sha_regions = vzalloc(sha_region_sz); - if (!sha_regions) + if (!sha_regions) { + ret = -ENOMEM; goto out_free_desc; + }
desc->tfm = tfm; desc->flags = 0;
From: Eric Dumazet edumazet@google.com
stable inclusion from linux-4.19.191 commit efcd730ddd6f25578bd31bfe703e593e2421d708
--------------------------------
[ Upstream commit a54754ec9891830ba548e2010c889e3c8146e449 ]
Number of buckets being stored in 32bit variables, we have to ensure that no overflows occur in nft_hash_buckets()
syzbot injected a size == 0x40000000 and reported:
UBSAN: shift-out-of-bounds in ./include/linux/log2.h:57:13 shift exponent 64 is too large for 64-bit type 'long unsigned int' CPU: 1 PID: 29539 Comm: syz-executor.4 Not tainted 5.12.0-rc7-syzkaller #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Call Trace: __dump_stack lib/dump_stack.c:79 [inline] dump_stack+0x141/0x1d7 lib/dump_stack.c:120 ubsan_epilogue+0xb/0x5a lib/ubsan.c:148 __ubsan_handle_shift_out_of_bounds.cold+0xb1/0x181 lib/ubsan.c:327 __roundup_pow_of_two include/linux/log2.h:57 [inline] nft_hash_buckets net/netfilter/nft_set_hash.c:411 [inline] nft_hash_estimate.cold+0x19/0x1e net/netfilter/nft_set_hash.c:652 nft_select_set_ops net/netfilter/nf_tables_api.c:3586 [inline] nf_tables_newset+0xe62/0x3110 net/netfilter/nf_tables_api.c:4322 nfnetlink_rcv_batch+0xa09/0x24b0 net/netfilter/nfnetlink.c:488 nfnetlink_rcv_skb_batch net/netfilter/nfnetlink.c:612 [inline] nfnetlink_rcv+0x3af/0x420 net/netfilter/nfnetlink.c:630 netlink_unicast_kernel net/netlink/af_netlink.c:1312 [inline] netlink_unicast+0x533/0x7d0 net/netlink/af_netlink.c:1338 netlink_sendmsg+0x856/0xd90 net/netlink/af_netlink.c:1927 sock_sendmsg_nosec net/socket.c:654 [inline] sock_sendmsg+0xcf/0x120 net/socket.c:674 ____sys_sendmsg+0x6e8/0x810 net/socket.c:2350 ___sys_sendmsg+0xf3/0x170 net/socket.c:2404 __sys_sendmsg+0xe5/0x1b0 net/socket.c:2433 do_syscall_64+0x2d/0x70 arch/x86/entry/common.c:46
Fixes: 0ed6389c483d ("netfilter: nf_tables: rename set implementations") Signed-off-by: Eric Dumazet edumazet@google.com Reported-by: syzbot syzkaller@googlegroups.com Signed-off-by: Pablo Neira Ayuso pablo@netfilter.org Signed-off-by: Sasha Levin sashal@kernel.org Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- net/netfilter/nft_set_hash.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-)
diff --git a/net/netfilter/nft_set_hash.c b/net/netfilter/nft_set_hash.c index 05118e03c3e48..dbc4ed643b4bc 100644 --- a/net/netfilter/nft_set_hash.c +++ b/net/netfilter/nft_set_hash.c @@ -392,9 +392,17 @@ static void nft_rhash_destroy(const struct nft_set *set) (void *)set); }
+/* Number of buckets is stored in u32, so cap our result to 1U<<31 */ +#define NFT_MAX_BUCKETS (1U << 31) + static u32 nft_hash_buckets(u32 size) { - return roundup_pow_of_two(size * 4 / 3); + u64 val = div_u64((u64)size * 4, 3); + + if (val >= NFT_MAX_BUCKETS) + return NFT_MAX_BUCKETS; + + return roundup_pow_of_two(val); }
static bool nft_rhash_estimate(const struct nft_set_desc *desc, u32 features,
From: Axel Rasmussen axelrasmussen@google.com
stable inclusion from linux-4.19.191 commit 07c9b834c97d0fa3402fb7f3f3b32df370a6ff1f
--------------------------------
commit 7ed9d238c7dbb1fdb63ad96a6184985151b0171c upstream.
Consider the following sequence of events:
1. Userspace issues a UFFD ioctl, which ends up calling into shmem_mfill_atomic_pte(). We successfully account the blocks, we shmem_alloc_page(), but then the copy_from_user() fails. We return -ENOENT. We don't release the page we allocated. 2. Our caller detects this error code, tries the copy_from_user() after dropping the mmap_lock, and retries, calling back into shmem_mfill_atomic_pte(). 3. Meanwhile, let's say another process filled up the tmpfs being used. 4. So shmem_mfill_atomic_pte() fails to account blocks this time, and immediately returns - without releasing the page.
This triggers a BUG_ON in our caller, which asserts that the page should always be consumed, unless -ENOENT is returned.
To fix this, detect if we have such a "dangling" page when accounting fails, and if so, release it before returning.
Link: https://lkml.kernel.org/r/20210428230858.348400-1-axelrasmussen@google.com Fixes: cb658a453b93 ("userfaultfd: shmem: avoid leaking blocks and used blocks in UFFDIO_COPY") Signed-off-by: Axel Rasmussen axelrasmussen@google.com Reported-by: Hugh Dickins hughd@google.com Acked-by: Hugh Dickins hughd@google.com Reviewed-by: Peter Xu peterx@redhat.com Cc: stable@vger.kernel.org Signed-off-by: Andrew Morton akpm@linux-foundation.org Signed-off-by: Linus Torvalds torvalds@linux-foundation.org Signed-off-by: Greg Kroah-Hartman gregkh@linuxfoundation.org Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- mm/shmem.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-)
diff --git a/mm/shmem.c b/mm/shmem.c index b4be0be77327c..8a41ab86152c5 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -2361,8 +2361,18 @@ static int shmem_mfill_atomic_pte(struct mm_struct *dst_mm, pgoff_t offset, max_off;
ret = -ENOMEM; - if (!shmem_inode_acct_block(inode, 1)) + if (!shmem_inode_acct_block(inode, 1)) { + /* + * We may have got a page, returned -ENOENT triggering a retry, + * and now we find ourselves with -ENOMEM. Release the page, to + * avoid a BUG_ON in our caller. + */ + if (unlikely(*pagep)) { + put_page(*pagep); + *pagep = NULL; + } goto out; + }
if (!*pagep) { page = shmem_alloc_page(gfp, info, pgoff);
From: Bart Van Assche bvanassche@acm.org
stable inclusion from linux-4.19.191 commit 00b5bc6421739f6eba45eede245846394e0dd279
--------------------------------
[ Upstream commit 630ef623ed26c18a457cdc070cf24014e50129c2 ]
If a tag set is shared across request queues (e.g. SCSI LUNs) then the block layer core keeps track of the number of active request queues in tags->active_queues. blk_mq_tag_busy() and blk_mq_tag_idle() update that atomic counter if the hctx flag BLK_MQ_F_TAG_QUEUE_SHARED is set. Make sure that blk_mq_exit_queue() calls blk_mq_tag_idle() before that flag is cleared by blk_mq_del_queue_tag_set().
Cc: Christoph Hellwig hch@infradead.org Cc: Ming Lei ming.lei@redhat.com Cc: Hannes Reinecke hare@suse.com Fixes: 0d2602ca30e4 ("blk-mq: improve support for shared tags maps") Signed-off-by: Bart Van Assche bvanassche@acm.org Reviewed-by: Ming Lei ming.lei@redhat.com Link: https://lore.kernel.org/r/20210513171529.7977-1-bvanassche@acm.org Signed-off-by: Jens Axboe axboe@kernel.dk Signed-off-by: Sasha Levin sashal@kernel.org Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- block/blk-mq.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-)
diff --git a/block/blk-mq.c b/block/blk-mq.c index 4965023121b5a..607ee5dafa2e9 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -2816,10 +2816,12 @@ EXPORT_SYMBOL(blk_mq_init_allocated_queue); /* tags can _not_ be used after returning from blk_mq_exit_queue */ void blk_mq_exit_queue(struct request_queue *q) { - struct blk_mq_tag_set *set = q->tag_set; + struct blk_mq_tag_set *set = q->tag_set;
- blk_mq_del_queue_tag_set(q); + /* Checks hctx->flags & BLK_MQ_F_TAG_QUEUE_SHARED. */ blk_mq_exit_hw_queues(q, set, set->nr_hw_queues); + /* May clear BLK_MQ_F_TAG_QUEUE_SHARED in hctx->flags. */ + blk_mq_del_queue_tag_set(q); }
static int __blk_mq_alloc_rq_maps(struct blk_mq_tag_set *set)
From: Greg Kroah-Hartman gregkh@linuxfoundation.org
stable inclusion from linux-4.19.191 commit 1ea5cdd5388628a6e3762392717db0d980307ce8
--------------------------------
commit b4104180a2efb85f55e1ba1407885c9421970338 upstream.
syzbot can trigger the WARN() in init_uevent_argv() which isn't the nicest as the code does properly recover and handle the error. So change the WARN() call to pr_warn() and provide some more information on what the buffer size that was needed.
Link: https://lore.kernel.org/r/20201107082206.GA19079@kroah.com Cc: "Rafael J. Wysocki" rafael@kernel.org Cc: linux-kernel@vger.kernel.org Reported-by: syzbot+92340f7b2b4789907fdb@syzkaller.appspotmail.com Signed-off-by: Greg Kroah-Hartman gregkh@linuxfoundation.org Link: https://lore.kernel.org/r/20210405094852.1348499-1-gregkh@linuxfoundation.or... Signed-off-by: Greg Kroah-Hartman gregkh@linuxfoundation.org Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- lib/kobject_uevent.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-)
diff --git a/lib/kobject_uevent.c b/lib/kobject_uevent.c index 7761f32943391..26d21339bef27 100644 --- a/lib/kobject_uevent.c +++ b/lib/kobject_uevent.c @@ -250,12 +250,13 @@ static int kobj_usermode_filter(struct kobject *kobj)
static int init_uevent_argv(struct kobj_uevent_env *env, const char *subsystem) { + int buffer_size = sizeof(env->buf) - env->buflen; int len;
- len = strlcpy(&env->buf[env->buflen], subsystem, - sizeof(env->buf) - env->buflen); - if (len >= (sizeof(env->buf) - env->buflen)) { - WARN(1, KERN_ERR "init_uevent_argv: buffer size too small\n"); + len = strlcpy(&env->buf[env->buflen], subsystem, buffer_size); + if (len >= buffer_size) { + pr_warn("init_uevent_argv: buffer size of %d too small, needed %d\n", + buffer_size, len); return -ENOMEM; }
From: Jonathon Reinhart jonathon.reinhart@gmail.com
stable inclusion from linux-4.19.191 commit 9b288479f7a901a14ce703938596438559d7df55
--------------------------------
commit 2671fa4dc0109d3fb581bc3078fdf17b5d9080f6 upstream.
These sysctls point to global variables: - NF_SYSCTL_CT_MAX (&nf_conntrack_max) - NF_SYSCTL_CT_EXPECT_MAX (&nf_ct_expect_max) - NF_SYSCTL_CT_BUCKETS (&nf_conntrack_htable_size_user)
Because their data pointers are not updated to point to per-netns structures, they must be marked read-only in a non-init_net ns. Otherwise, changes in any net namespace are reflected in (leaked into) all other net namespaces. This problem has existed since the introduction of net namespaces.
The current logic marks them read-only only if the net namespace is owned by an unprivileged user (other than init_user_ns).
Commit d0febd81ae77 ("netfilter: conntrack: re-visit sysctls in unprivileged namespaces") "exposes all sysctls even if the namespace is unpriviliged." Since we need to mark them readonly in any case, we can forego the unprivileged user check altogether.
Fixes: d0febd81ae77 ("netfilter: conntrack: re-visit sysctls in unprivileged namespaces") Signed-off-by: Jonathon Reinhart Jonathon.Reinhart@gmail.com Signed-off-by: David S. Miller davem@davemloft.net Signed-off-by: Greg Kroah-Hartman gregkh@linuxfoundation.org Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- net/netfilter/nf_conntrack_standalone.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-)
diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c index 2e3ae494f3697..da0c9fa381d20 100644 --- a/net/netfilter/nf_conntrack_standalone.c +++ b/net/netfilter/nf_conntrack_standalone.c @@ -594,8 +594,11 @@ static int nf_conntrack_standalone_init_sysctl(struct net *net) if (net->user_ns != &init_user_ns) table[0].procname = NULL;
- if (!net_eq(&init_net, net)) + if (!net_eq(&init_net, net)) { + table[0].mode = 0444; table[2].mode = 0444; + table[5].mode = 0444; + }
net->ct.sysctl_header = register_net_sysctl(net, "net/netfilter", table); if (!net->ct.sysctl_header)
From: Christoph Hellwig hch@lst.de
stable inclusion from linux-4.19.191 commit a3dd6095f1a66fd05fe79c96fb9f7a3a13d0ca03
--------------------------------
commit 53fe2a30bc168db9700e00206d991ff934973cf1 upstream.
Do not call nvme_configure_apst when the controller is not live, given that nvme_configure_apst will fail due the lack of an admin queue when the controller is being torn down and nvme_set_latency_tolerance is called from dev_pm_qos_hide_latency_tolerance.
Fixes: 510a405d945b("nvme: fix memory leak for power latency tolerance") Reported-by: Peng Liu liupeng17@lenovo.com Signed-off-by: Christoph Hellwig hch@lst.de Reviewed-by: Keith Busch kbusch@kernel.org Signed-off-by: Greg Kroah-Hartman gregkh@linuxfoundation.org Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- drivers/nvme/host/core.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 0e1ec6b2a2a6e..7e9823140766d 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -2180,7 +2180,8 @@ static void nvme_set_latency_tolerance(struct device *dev, s32 val)
if (ctrl->ps_max_latency_us != latency) { ctrl->ps_max_latency_us = latency; - nvme_configure_apst(ctrl); + if (ctrl->state == NVME_CTRL_LIVE) + nvme_configure_apst(ctrl); } }
From: Feilong Lin linfeilong@huawei.com
stable inclusion from linux-4.19.191 commit d82da045a5da9f889a6cee316cabeb2be29cb199
--------------------------------
[ Upstream commit 3bbfd319034ddce59e023837a4aa11439460509b ]
In enable_slot(), if pci_get_slot() returns NULL, we clear the SLOT_ENABLED flag. When pci_get_slot() finds a device, it increments the device's reference count. In this case, we did not call pci_dev_put() to decrement the reference count, so the memory of the device (struct pci_dev type) will eventually leak.
Call pci_dev_put() to decrement its reference count when pci_get_slot() returns a PCI device.
Link: https://lore.kernel.org/r/b411af88-5049-a1c6-83ac-d104a1f429be@huawei.com Signed-off-by: Feilong Lin linfeilong@huawei.com Signed-off-by: Zhiqiang Liu liuzhiqiang26@huawei.com Signed-off-by: Bjorn Helgaas bhelgaas@google.com Reviewed-by: Rafael J. Wysocki rafael.j.wysocki@intel.com Signed-off-by: Sasha Levin sashal@kernel.org Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- drivers/pci/hotplug/acpiphp_glue.c | 1 + 1 file changed, 1 insertion(+)
diff --git a/drivers/pci/hotplug/acpiphp_glue.c b/drivers/pci/hotplug/acpiphp_glue.c index c94c135254479..a5401f96d9eaa 100644 --- a/drivers/pci/hotplug/acpiphp_glue.c +++ b/drivers/pci/hotplug/acpiphp_glue.c @@ -532,6 +532,7 @@ static void enable_slot(struct acpiphp_slot *slot, bool bridge) slot->flags &= ~SLOT_ENABLED; continue; } + pci_dev_put(dev); } }
From: Bodo Stroesser bostroesser@gmail.com
stable inclusion from linux-4.19.191 commit 84d29b2fb3caf42d0bb0907a0ba01fa111ff4791
--------------------------------
[ Upstream commit 9814b55cde0588b6d9bc496cee43f87316cbc6f1 ]
If tcmu_handle_completions() finds an invalid cmd_id while looping over cmd responses from userspace it sets TCMU_DEV_BIT_BROKEN and breaks the loop. This means that it does further handling for the tcmu device.
Skip that handling by replacing 'break' with 'return'.
Additionally change tcmu_handle_completions() from unsigned int to bool, since the value used in return already is bool.
Link: https://lore.kernel.org/r/20210423150123.24468-1-bostroesser@gmail.com Signed-off-by: Bodo Stroesser bostroesser@gmail.com Signed-off-by: Martin K. Petersen martin.petersen@oracle.com Signed-off-by: Sasha Levin sashal@kernel.org Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- drivers/target/target_core_user.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/drivers/target/target_core_user.c b/drivers/target/target_core_user.c index 368d9f658b214..56f0cebc138bc 100644 --- a/drivers/target/target_core_user.c +++ b/drivers/target/target_core_user.c @@ -1225,7 +1225,7 @@ static void tcmu_set_next_deadline(struct list_head *queue, del_timer(timer); }
-static unsigned int tcmu_handle_completions(struct tcmu_dev *udev) +static bool tcmu_handle_completions(struct tcmu_dev *udev) { struct tcmu_mailbox *mb; struct tcmu_cmd *cmd; @@ -1258,7 +1258,7 @@ static unsigned int tcmu_handle_completions(struct tcmu_dev *udev) pr_err("cmd_id %u not found, ring is broken\n", entry->hdr.cmd_id); set_bit(TCMU_DEV_BIT_BROKEN, &udev->flags); - break; + return false; }
tcmu_handle_completion(cmd, entry);
From: yangerkun yangerkun@huawei.com
stable inclusion from linux-4.19.191 commit b9bcbc3c743c46bead89dffe4283ff7cd9e118bf
--------------------------------
[ Upstream commit cf7b39a0cbf6bf57aa07a008d46cf695add05b4c ]
We get a bug:
BUG: KASAN: slab-out-of-bounds in iov_iter_revert+0x11c/0x404 lib/iov_iter.c:1139 Read of size 8 at addr ffff0000d3fb11f8 by task
CPU: 0 PID: 12582 Comm: syz-executor.2 Not tainted 5.10.0-00843-g352c8610ccd2 #2 Hardware name: linux,dummy-virt (DT) Call trace: dump_backtrace+0x0/0x2d0 arch/arm64/kernel/stacktrace.c:132 show_stack+0x28/0x34 arch/arm64/kernel/stacktrace.c:196 __dump_stack lib/dump_stack.c:77 [inline] dump_stack+0x110/0x164 lib/dump_stack.c:118 print_address_description+0x78/0x5c8 mm/kasan/report.c:385 __kasan_report mm/kasan/report.c:545 [inline] kasan_report+0x148/0x1e4 mm/kasan/report.c:562 check_memory_region_inline mm/kasan/generic.c:183 [inline] __asan_load8+0xb4/0xbc mm/kasan/generic.c:252 iov_iter_revert+0x11c/0x404 lib/iov_iter.c:1139 io_read fs/io_uring.c:3421 [inline] io_issue_sqe+0x2344/0x2d64 fs/io_uring.c:5943 __io_queue_sqe+0x19c/0x520 fs/io_uring.c:6260 io_queue_sqe+0x2a4/0x590 fs/io_uring.c:6326 io_submit_sqe fs/io_uring.c:6395 [inline] io_submit_sqes+0x4c0/0xa04 fs/io_uring.c:6624 __do_sys_io_uring_enter fs/io_uring.c:9013 [inline] __se_sys_io_uring_enter fs/io_uring.c:8960 [inline] __arm64_sys_io_uring_enter+0x190/0x708 fs/io_uring.c:8960 __invoke_syscall arch/arm64/kernel/syscall.c:36 [inline] invoke_syscall arch/arm64/kernel/syscall.c:48 [inline] el0_svc_common arch/arm64/kernel/syscall.c:158 [inline] do_el0_svc+0x120/0x290 arch/arm64/kernel/syscall.c:227 el0_svc+0x1c/0x28 arch/arm64/kernel/entry-common.c:367 el0_sync_handler+0x98/0x170 arch/arm64/kernel/entry-common.c:383 el0_sync+0x140/0x180 arch/arm64/kernel/entry.S:670
Allocated by task 12570: stack_trace_save+0x80/0xb8 kernel/stacktrace.c:121 kasan_save_stack mm/kasan/common.c:48 [inline] kasan_set_track mm/kasan/common.c:56 [inline] __kasan_kmalloc+0xdc/0x120 mm/kasan/common.c:461 kasan_kmalloc+0xc/0x14 mm/kasan/common.c:475 __kmalloc+0x23c/0x334 mm/slub.c:3970 kmalloc include/linux/slab.h:557 [inline] __io_alloc_async_data+0x68/0x9c fs/io_uring.c:3210 io_setup_async_rw fs/io_uring.c:3229 [inline] io_read fs/io_uring.c:3436 [inline] io_issue_sqe+0x2954/0x2d64 fs/io_uring.c:5943 __io_queue_sqe+0x19c/0x520 fs/io_uring.c:6260 io_queue_sqe+0x2a4/0x590 fs/io_uring.c:6326 io_submit_sqe fs/io_uring.c:6395 [inline] io_submit_sqes+0x4c0/0xa04 fs/io_uring.c:6624 __do_sys_io_uring_enter fs/io_uring.c:9013 [inline] __se_sys_io_uring_enter fs/io_uring.c:8960 [inline] __arm64_sys_io_uring_enter+0x190/0x708 fs/io_uring.c:8960 __invoke_syscall arch/arm64/kernel/syscall.c:36 [inline] invoke_syscall arch/arm64/kernel/syscall.c:48 [inline] el0_svc_common arch/arm64/kernel/syscall.c:158 [inline] do_el0_svc+0x120/0x290 arch/arm64/kernel/syscall.c:227 el0_svc+0x1c/0x28 arch/arm64/kernel/entry-common.c:367 el0_sync_handler+0x98/0x170 arch/arm64/kernel/entry-common.c:383 el0_sync+0x140/0x180 arch/arm64/kernel/entry.S:670
Freed by task 12570: stack_trace_save+0x80/0xb8 kernel/stacktrace.c:121 kasan_save_stack mm/kasan/common.c:48 [inline] kasan_set_track+0x38/0x6c mm/kasan/common.c:56 kasan_set_free_info+0x20/0x40 mm/kasan/generic.c:355 __kasan_slab_free+0x124/0x150 mm/kasan/common.c:422 kasan_slab_free+0x10/0x1c mm/kasan/common.c:431 slab_free_hook mm/slub.c:1544 [inline] slab_free_freelist_hook mm/slub.c:1577 [inline] slab_free mm/slub.c:3142 [inline] kfree+0x104/0x38c mm/slub.c:4124 io_dismantle_req fs/io_uring.c:1855 [inline] __io_free_req+0x70/0x254 fs/io_uring.c:1867 io_put_req_find_next fs/io_uring.c:2173 [inline] __io_queue_sqe+0x1fc/0x520 fs/io_uring.c:6279 __io_req_task_submit+0x154/0x21c fs/io_uring.c:2051 io_req_task_submit+0x2c/0x44 fs/io_uring.c:2063 task_work_run+0xdc/0x128 kernel/task_work.c:151 get_signal+0x6f8/0x980 kernel/signal.c:2562 do_signal+0x108/0x3a4 arch/arm64/kernel/signal.c:658 do_notify_resume+0xbc/0x25c arch/arm64/kernel/signal.c:722 work_pending+0xc/0x180
blkdev_read_iter can truncate iov_iter's count since the count + pos may exceed the size of the blkdev. This will confuse io_read that we have consume the iovec. And once we do the iov_iter_revert in io_read, we will trigger the slab-out-of-bounds. Fix it by reexpand the count with size has been truncated.
blkdev_write_iter can trigger the problem too.
Signed-off-by: yangerkun yangerkun@huawei.com Acked-by: Pavel Begunkov asml.silencec@gmail.com Link: https://lore.kernel.org/r/20210401071807.3328235-1-yangerkun@huawei.com Signed-off-by: Jens Axboe axboe@kernel.dk Signed-off-by: Sasha Levin sashal@kernel.org Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- fs/block_dev.c | 20 +++++++++++++++++--- 1 file changed, 17 insertions(+), 3 deletions(-)
diff --git a/fs/block_dev.c b/fs/block_dev.c index a90bfc36c6da2..786d105692e85 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -2003,6 +2003,7 @@ ssize_t blkdev_write_iter(struct kiocb *iocb, struct iov_iter *from) struct inode *bd_inode = bdev_file_inode(file); loff_t size = i_size_read(bd_inode); struct blk_plug plug; + size_t shorted = 0; ssize_t ret;
if (bdev_read_only(I_BDEV(bd_inode))) @@ -2021,12 +2022,17 @@ ssize_t blkdev_write_iter(struct kiocb *iocb, struct iov_iter *from) if ((iocb->ki_flags & (IOCB_NOWAIT | IOCB_DIRECT)) == IOCB_NOWAIT) return -EOPNOTSUPP;
- iov_iter_truncate(from, size - iocb->ki_pos); + size -= iocb->ki_pos; + if (iov_iter_count(from) > size) { + shorted = iov_iter_count(from) - size; + iov_iter_truncate(from, size); + }
blk_start_plug(&plug); ret = __generic_file_write_iter(iocb, from); if (ret > 0) ret = generic_write_sync(iocb, ret); + iov_iter_reexpand(from, iov_iter_count(from) + shorted); blk_finish_plug(&plug); return ret; } @@ -2038,13 +2044,21 @@ ssize_t blkdev_read_iter(struct kiocb *iocb, struct iov_iter *to) struct inode *bd_inode = bdev_file_inode(file); loff_t size = i_size_read(bd_inode); loff_t pos = iocb->ki_pos; + size_t shorted = 0; + ssize_t ret;
if (pos >= size) return 0;
size -= pos; - iov_iter_truncate(to, size); - return generic_file_read_iter(iocb, to); + if (iov_iter_count(to) > size) { + shorted = iov_iter_count(to) - size; + iov_iter_truncate(to, size); + } + + ret = generic_file_read_iter(iocb, to); + iov_iter_reexpand(to, iov_iter_count(to) + shorted); + return ret; } EXPORT_SYMBOL_GPL(blkdev_read_iter);
From: Eric Dumazet edumazet@google.com
stable inclusion from linux-4.19.191 commit 2dc72d6a7f0e8b2eff9f17959157cf50618bc1ca
--------------------------------
commit 7f700334be9aeb91d5d86ef9ad2d901b9b453e9b upstream.
After adopting CONFIG_PCPU_DEV_REFCNT=n option, syzbot was able to trigger a warning [1]
Issue here is that:
- all dev_put() should be paired with a corresponding dev_hold(), and vice versa.
- A driver doing a dev_put() in its ndo_uninit() MUST also do a dev_hold() in its ndo_init(), only when ndo_init() is returning 0.
Otherwise, register_netdevice() would call ndo_uninit() in its error path and release a refcount too soon.
ip6_gre for example (among others problematic drivers) has to use dev_hold() in ip6gre_tunnel_init_common() instead of from ip6gre_newlink_common(), covering both ip6gre_tunnel_init() and ip6gre_tap_init()/
Note that ip6gre_tunnel_init_common() is not called from ip6erspan_tap_init() thus we also need to add a dev_hold() there, as ip6erspan_tunnel_uninit() does call dev_put()
[1] refcount_t: decrement hit 0; leaking memory. WARNING: CPU: 0 PID: 8422 at lib/refcount.c:31 refcount_warn_saturate+0xbf/0x1e0 lib/refcount.c:31 Modules linked in: CPU: 1 PID: 8422 Comm: syz-executor854 Not tainted 5.12.0-rc4-syzkaller #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 RIP: 0010:refcount_warn_saturate+0xbf/0x1e0 lib/refcount.c:31 Code: 1d 6a 5a e8 09 31 ff 89 de e8 8d 1a ab fd 84 db 75 e0 e8 d4 13 ab fd 48 c7 c7 a0 e1 c1 89 c6 05 4a 5a e8 09 01 e8 2e 36 fb 04 <0f> 0b eb c4 e8 b8 13 ab fd 0f b6 1d 39 5a e8 09 31 ff 89 de e8 58 RSP: 0018:ffffc900018befd0 EFLAGS: 00010282 RAX: 0000000000000000 RBX: 0000000000000000 RCX: 0000000000000000 RDX: ffff88801ef19c40 RSI: ffffffff815c51f5 RDI: fffff52000317dec RBP: 0000000000000004 R08: 0000000000000000 R09: 0000000000000000 R10: ffffffff815bdf8e R11: 0000000000000000 R12: ffff888018cf4568 R13: ffff888018cf4c00 R14: ffff8880228f2000 R15: ffffffff8d659b80 FS: 00000000014eb300(0000) GS:ffff8880b9c00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 000055d7bf2b3138 CR3: 0000000014933000 CR4: 00000000001506f0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: __refcount_dec include/linux/refcount.h:344 [inline] refcount_dec include/linux/refcount.h:359 [inline] dev_put include/linux/netdevice.h:4135 [inline] ip6gre_tunnel_uninit+0x3d7/0x440 net/ipv6/ip6_gre.c:420 register_netdevice+0xadf/0x1500 net/core/dev.c:10308 ip6gre_newlink_common.constprop.0+0x158/0x410 net/ipv6/ip6_gre.c:1984 ip6gre_newlink+0x275/0x7a0 net/ipv6/ip6_gre.c:2017 __rtnl_newlink+0x1062/0x1710 net/core/rtnetlink.c:3443 rtnl_newlink+0x64/0xa0 net/core/rtnetlink.c:3491 rtnetlink_rcv_msg+0x44e/0xad0 net/core/rtnetlink.c:5553 netlink_rcv_skb+0x153/0x420 net/netlink/af_netlink.c:2502 netlink_unicast_kernel net/netlink/af_netlink.c:1312 [inline] netlink_unicast+0x533/0x7d0 net/netlink/af_netlink.c:1338 netlink_sendmsg+0x856/0xd90 net/netlink/af_netlink.c:1927 sock_sendmsg_nosec net/socket.c:654 [inline] sock_sendmsg+0xcf/0x120 net/socket.c:674 ____sys_sendmsg+0x6e8/0x810 net/socket.c:2350 ___sys_sendmsg+0xf3/0x170 net/socket.c:2404 __sys_sendmsg+0xe5/0x1b0 net/socket.c:2433 do_syscall_64+0x2d/0x70 arch/x86/entry/common.c:46
Fixes: 919067cc845f ("net: add CONFIG_PCPU_DEV_REFCNT") Signed-off-by: Eric Dumazet edumazet@google.com Reported-by: syzbot syzkaller@googlegroups.com Signed-off-by: David S. Miller davem@davemloft.net Signed-off-by: Greg Kroah-Hartman gregkh@linuxfoundation.org Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- net/ipv6/ip6_gre.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c index 86c8ea7d70067..6cbc6e979bed7 100644 --- a/net/ipv6/ip6_gre.c +++ b/net/ipv6/ip6_gre.c @@ -1503,6 +1503,7 @@ static int ip6gre_tunnel_init_common(struct net_device *dev) } ip6gre_tnl_init_features(dev);
+ dev_hold(dev); return 0;
cleanup_dst_cache_init: @@ -1896,6 +1897,7 @@ static int ip6erspan_tap_init(struct net_device *dev) dev->priv_flags |= IFF_LIVE_ADDR_CHANGE; ip6erspan_tnl_link_config(tunnel, 1);
+ dev_hold(dev); return 0;
cleanup_dst_cache_init: @@ -2001,8 +2003,6 @@ static int ip6gre_newlink_common(struct net *src_net, struct net_device *dev, if (tb[IFLA_MTU]) ip6_tnl_change_mtu(dev, nla_get_u32(tb[IFLA_MTU]));
- dev_hold(dev); - out: return err; }
From: Eric Dumazet edumazet@google.com
stable inclusion from linux-4.19.191 commit 01cc9ab6fdf1ddb0a5355196557b95819788f9bb
--------------------------------
commit 6289a98f0817a4a457750d6345e754838eae9439 upstream.
After adopting CONFIG_PCPU_DEV_REFCNT=n option, syzbot was able to trigger a warning [1]
Issue here is that:
- all dev_put() should be paired with a corresponding prior dev_hold().
- A driver doing a dev_put() in its ndo_uninit() MUST also do a dev_hold() in its ndo_init(), only when ndo_init() is returning 0.
Otherwise, register_netdevice() would call ndo_uninit() in its error path and release a refcount too soon.
Fixes: 919067cc845f ("net: add CONFIG_PCPU_DEV_REFCNT") Signed-off-by: Eric Dumazet edumazet@google.com Reported-by: syzbot syzkaller@googlegroups.com Signed-off-by: David S. Miller davem@davemloft.net Signed-off-by: Greg Kroah-Hartman gregkh@linuxfoundation.org Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- net/ipv6/sit.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-)
diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c index bcf29201f87b3..a629ec46ba3a1 100644 --- a/net/ipv6/sit.c +++ b/net/ipv6/sit.c @@ -215,8 +215,6 @@ static int ipip6_tunnel_create(struct net_device *dev)
ipip6_tunnel_clone_6rd(dev, sitn);
- dev_hold(dev); - ipip6_tunnel_link(sitn, t); return 0;
@@ -1407,7 +1405,7 @@ static int ipip6_tunnel_init(struct net_device *dev) dev->tstats = NULL; return err; } - + dev_hold(dev); return 0; }
From: Eric Dumazet edumazet@google.com
stable inclusion from linux-4.19.191 commit 873d5de7f7477a5c7258a9dc3967053f466257a2
--------------------------------
commit 48bb5697269a7cbe5194dbb044dc38c517e34c58 upstream.
Same reasons than for the previous commits : 6289a98f0817 ("sit: proper dev_{hold|put} in ndo_[un]init methods") 40cb881b5aaa ("ip6_vti: proper dev_{hold|put} in ndo_[un]init methods") 7f700334be9a ("ip6_gre: proper dev_{hold|put} in ndo_[un]init methods")
After adopting CONFIG_PCPU_DEV_REFCNT=n option, syzbot was able to trigger a warning [1]
Issue here is that:
- all dev_put() should be paired with a corresponding prior dev_hold().
- A driver doing a dev_put() in its ndo_uninit() MUST also do a dev_hold() in its ndo_init(), only when ndo_init() is returning 0.
Otherwise, register_netdevice() would call ndo_uninit() in its error path and release a refcount too soon.
[1] WARNING: CPU: 1 PID: 21059 at lib/refcount.c:31 refcount_warn_saturate+0xbf/0x1e0 lib/refcount.c:31 Modules linked in: CPU: 1 PID: 21059 Comm: syz-executor.4 Not tainted 5.12.0-rc4-syzkaller #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 RIP: 0010:refcount_warn_saturate+0xbf/0x1e0 lib/refcount.c:31 Code: 1d 6a 5a e8 09 31 ff 89 de e8 8d 1a ab fd 84 db 75 e0 e8 d4 13 ab fd 48 c7 c7 a0 e1 c1 89 c6 05 4a 5a e8 09 01 e8 2e 36 fb 04 <0f> 0b eb c4 e8 b8 13 ab fd 0f b6 1d 39 5a e8 09 31 ff 89 de e8 58 RSP: 0018:ffffc900025aefe8 EFLAGS: 00010282 RAX: 0000000000000000 RBX: 0000000000000000 RCX: 0000000000000000 RDX: 0000000000040000 RSI: ffffffff815c51f5 RDI: fffff520004b5def RBP: 0000000000000004 R08: 0000000000000000 R09: 0000000000000000 R10: ffffffff815bdf8e R11: 0000000000000000 R12: ffff888023488568 R13: ffff8880254e9000 R14: 00000000dfd82cfd R15: ffff88802ee2d7c0 FS: 00007f13bc590700(0000) GS:ffff8880b9c00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007f0943e74000 CR3: 0000000025273000 CR4: 00000000001506f0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: __refcount_dec include/linux/refcount.h:344 [inline] refcount_dec include/linux/refcount.h:359 [inline] dev_put include/linux/netdevice.h:4135 [inline] ip6_tnl_dev_uninit+0x370/0x3d0 net/ipv6/ip6_tunnel.c:387 register_netdevice+0xadf/0x1500 net/core/dev.c:10308 ip6_tnl_create2+0x1b5/0x400 net/ipv6/ip6_tunnel.c:263 ip6_tnl_newlink+0x312/0x580 net/ipv6/ip6_tunnel.c:2052 __rtnl_newlink+0x1062/0x1710 net/core/rtnetlink.c:3443 rtnl_newlink+0x64/0xa0 net/core/rtnetlink.c:3491 rtnetlink_rcv_msg+0x44e/0xad0 net/core/rtnetlink.c:5553 netlink_rcv_skb+0x153/0x420 net/netlink/af_netlink.c:2502 netlink_unicast_kernel net/netlink/af_netlink.c:1312 [inline] netlink_unicast+0x533/0x7d0 net/netlink/af_netlink.c:1338 netlink_sendmsg+0x856/0xd90 net/netlink/af_netlink.c:1927 sock_sendmsg_nosec net/socket.c:654 [inline] sock_sendmsg+0xcf/0x120 net/socket.c:674 ____sys_sendmsg+0x6e8/0x810 net/socket.c:2350 ___sys_sendmsg+0xf3/0x170 net/socket.c:2404 __sys_sendmsg+0xe5/0x1b0 net/socket.c:2433 do_syscall_64+0x2d/0x70 arch/x86/entry/common.c:46 entry_SYSCALL_64_after_hwframe+0x44/0xae
Fixes: 919067cc845f ("net: add CONFIG_PCPU_DEV_REFCNT") Signed-off-by: Eric Dumazet edumazet@google.com Reported-by: syzbot syzkaller@googlegroups.com Signed-off-by: David S. Miller davem@davemloft.net Signed-off-by: Greg Kroah-Hartman gregkh@linuxfoundation.org Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- net/ipv6/ip6_tunnel.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index 28c4a693ab98d..45c4c30d9806d 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -272,7 +272,6 @@ static int ip6_tnl_create2(struct net_device *dev)
strcpy(t->parms.name, dev->name);
- dev_hold(dev); ip6_tnl_link(ip6n, t); return 0;
@@ -1866,6 +1865,7 @@ ip6_tnl_dev_init_gen(struct net_device *dev) dev->min_mtu = ETH_MIN_MTU; dev->max_mtu = IP6_MAX_MTU - dev->hard_header_len;
+ dev_hold(dev); return 0;
destroy_dst:
From: Eric Dumazet edumazet@google.com
stable inclusion from linux-4.19.191 commit 867fd8d5613e0632712bdc15167d8ce9eb2c0f8b
--------------------------------
commit 0d7a7b2014b1a499a0fe24c9f3063d7856b5aaaf upstream.
My previous commits added a dev_hold() in tunnels ndo_init(), but forgot to remove it from special functions setting up fallback tunnels.
Fallback tunnels do call their respective ndo_init()
This leads to various reports like :
unregister_netdevice: waiting for ip6gre0 to become free. Usage count = 2
Fixes: 48bb5697269a ("ip6_tunnel: sit: proper dev_{hold|put} in ndo_[un]init methods") Fixes: 6289a98f0817 ("sit: proper dev_{hold|put} in ndo_[un]init methods") Fixes: 40cb881b5aaa ("ip6_vti: proper dev_{hold|put} in ndo_[un]init methods") Fixes: 7f700334be9a ("ip6_gre: proper dev_{hold|put} in ndo_[un]init methods") Signed-off-by: Eric Dumazet edumazet@google.com Reported-by: syzbot syzkaller@googlegroups.com Signed-off-by: David S. Miller davem@davemloft.net Signed-off-by: Greg Kroah-Hartman gregkh@linuxfoundation.org Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- net/ipv6/ip6_gre.c | 3 --- net/ipv6/ip6_tunnel.c | 1 - net/ipv6/ip6_vti.c | 1 - net/ipv6/sit.c | 1 - 4 files changed, 6 deletions(-)
diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c index 6cbc6e979bed7..043e57d08a3e9 100644 --- a/net/ipv6/ip6_gre.c +++ b/net/ipv6/ip6_gre.c @@ -392,7 +392,6 @@ static struct ip6_tnl *ip6gre_tunnel_locate(struct net *net, if (!(nt->parms.o_flags & TUNNEL_SEQ)) dev->features |= NETIF_F_LLTX;
- dev_hold(dev); ip6gre_tunnel_link(ign, nt); return nt;
@@ -1546,8 +1545,6 @@ static void ip6gre_fb_tunnel_init(struct net_device *dev) strcpy(tunnel->parms.name, dev->name);
tunnel->hlen = sizeof(struct ipv6hdr) + 4; - - dev_hold(dev); }
static struct inet6_protocol ip6gre_protocol __read_mostly = { diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index 45c4c30d9806d..35c127c3eee78 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -1909,7 +1909,6 @@ static int __net_init ip6_fb_tnl_dev_init(struct net_device *dev) struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id);
t->parms.proto = IPPROTO_IPV6; - dev_hold(dev);
rcu_assign_pointer(ip6n->tnls_wc[0], t); return 0; diff --git a/net/ipv6/ip6_vti.c b/net/ipv6/ip6_vti.c index defa04b38ee8d..290badfe70e06 100644 --- a/net/ipv6/ip6_vti.c +++ b/net/ipv6/ip6_vti.c @@ -956,7 +956,6 @@ static int __net_init vti6_fb_tnl_dev_init(struct net_device *dev) struct vti6_net *ip6n = net_generic(net, vti6_net_id);
t->parms.proto = IPPROTO_IPV6; - dev_hold(dev);
rcu_assign_pointer(ip6n->tnls_wc[0], t); return 0; diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c index a629ec46ba3a1..bcd690ccb551e 100644 --- a/net/ipv6/sit.c +++ b/net/ipv6/sit.c @@ -1421,7 +1421,6 @@ static void __net_init ipip6_fb_tunnel_init(struct net_device *dev) iph->ihl = 5; iph->ttl = 64;
- dev_hold(dev); rcu_assign_pointer(sitn->tunnels_wc[0], tunnel); }
From: Dan Carpenter dan.carpenter@oracle.com
stable inclusion from linux-4.19.192 commit dcbce0b6f38476b380a0dec28e237d9398ca2bfe
--------------------------------
[ Upstream commit d9cd78edb2e6b7e26747c0ec312be31e7ef196fe ]
How the type promotion works in ternary expressions is a bit tricky. The problem is that scpi_clk_get_val() returns longs, "ret" is a int which holds a negative error code, and le32_to_cpu() is an unsigned int. We want the negative error code to be cast to a negative long. But because le32_to_cpu() is an u32 then "ret" is type promoted to u32 and becomes a high positive and then it is promoted to long and it is still a high positive value.
Fix this by getting rid of the ternary.
Link: https://lore.kernel.org/r/YIE7pdqV/h10tEAK@mwanda Fixes: 8cb7cf56c9fe ("firmware: add support for ARM System Control and Power Interface(SCPI) protocol") Reviewed-by: Cristian Marussi cristian.marussi@arm.com Signed-off-by: Dan Carpenter dan.carpenter@oracle.com [sudeep.holla: changed to return 0 as clock rate on error] Signed-off-by: Sudeep Holla sudeep.holla@arm.com Signed-off-by: Sasha Levin sashal@kernel.org Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- drivers/firmware/arm_scpi.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-)
diff --git a/drivers/firmware/arm_scpi.c b/drivers/firmware/arm_scpi.c index c7d06a36b23a5..baa7280eccb34 100644 --- a/drivers/firmware/arm_scpi.c +++ b/drivers/firmware/arm_scpi.c @@ -563,8 +563,10 @@ static unsigned long scpi_clk_get_val(u16 clk_id)
ret = scpi_send_message(CMD_GET_CLOCK_VALUE, &le_clk_id, sizeof(le_clk_id), &rate, sizeof(rate)); + if (ret) + return 0;
- return ret ? ret : le32_to_cpu(rate); + return le32_to_cpu(rate); }
static int scpi_clk_set_val(u16 clk_id, unsigned long rate)
From: Oleg Nesterov oleg@redhat.com
stable inclusion from linux-4.19.192 commit 365e5534cf89cff7def13f94b18f26ae4670e0cd
--------------------------------
[ Upstream commit dbb5afad100a828c97e012c6106566d99f041db6 ]
Suppose we have 2 threads, the group-leader L and a sub-theread T, both parked in ptrace_stop(). Debugger tries to resume both threads and does
ptrace(PTRACE_CONT, T); ptrace(PTRACE_CONT, L);
If the sub-thread T execs in between, the 2nd PTRACE_CONT doesn not resume the old leader L, it resumes the post-exec thread T which was actually now stopped in PTHREAD_EVENT_EXEC. In this case the PTHREAD_EVENT_EXEC event is lost, and the tracer can't know that the tracee changed its pid.
This patch makes ptrace() fail in this case until debugger does wait() and consumes PTHREAD_EVENT_EXEC which reports old_pid. This affects all ptrace requests except the "asynchronous" PTRACE_INTERRUPT/KILL.
The patch doesn't add the new PTRACE_ option to not complicate the API, and I _hope_ this won't cause any noticeable regression:
- If debugger uses PTRACE_O_TRACEEXEC and the thread did an exec and the tracer does a ptrace request without having consumed the exec event, it's 100% sure that the thread the ptracer thinks it is targeting does not exist anymore, or isn't the same as the one it thinks it is targeting.
- To some degree this patch adds nothing new. In the scenario above ptrace(L) can fail with -ESRCH if it is called after the execing sub-thread wakes the leader up and before it "steals" the leader's pid.
Test-case:
#include <stdio.h> #include <unistd.h> #include <signal.h> #include <sys/ptrace.h> #include <sys/wait.h> #include <errno.h> #include <pthread.h> #include <assert.h>
void *tf(void *arg) { execve("/usr/bin/true", NULL, NULL); assert(0);
return NULL; }
int main(void) { int leader = fork(); if (!leader) { kill(getpid(), SIGSTOP);
pthread_t th; pthread_create(&th, NULL, tf, NULL); for (;;) pause();
return 0; }
waitpid(leader, NULL, WSTOPPED);
ptrace(PTRACE_SEIZE, leader, 0, PTRACE_O_TRACECLONE | PTRACE_O_TRACEEXEC); waitpid(leader, NULL, 0);
ptrace(PTRACE_CONT, leader, 0,0); waitpid(leader, NULL, 0);
int status, thread = waitpid(-1, &status, 0); assert(thread > 0 && thread != leader); assert(status == 0x80137f);
ptrace(PTRACE_CONT, thread, 0,0); /* * waitid() because waitpid(leader, &status, WNOWAIT) does not * report status. Why ???? * * Why WEXITED? because we have another kernel problem connected * to mt-exec. */ siginfo_t info; assert(waitid(P_PID, leader, &info, WSTOPPED|WEXITED|WNOWAIT) == 0); assert(info.si_pid == leader && info.si_status == 0x0405);
/* OK, it sleeps in ptrace(PTRACE_EVENT_EXEC == 0x04) */ assert(ptrace(PTRACE_CONT, leader, 0,0) == -1); assert(errno == ESRCH);
assert(leader == waitpid(leader, &status, WNOHANG)); assert(status == 0x04057f);
assert(ptrace(PTRACE_CONT, leader, 0,0) == 0);
return 0; }
Signed-off-by: Oleg Nesterov oleg@redhat.com Reported-by: Simon Marchi simon.marchi@efficios.com Acked-by: "Eric W. Biederman" ebiederm@xmission.com Acked-by: Pedro Alves palves@redhat.com Acked-by: Simon Marchi simon.marchi@efficios.com Acked-by: Jan Kratochvil jan.kratochvil@redhat.com Signed-off-by: Linus Torvalds torvalds@linux-foundation.org Signed-off-by: Sasha Levin sashal@kernel.org Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- kernel/ptrace.c | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-)
diff --git a/kernel/ptrace.c b/kernel/ptrace.c index 47bb688dd87f4..f32095d7aa5e4 100644 --- a/kernel/ptrace.c +++ b/kernel/ptrace.c @@ -163,6 +163,21 @@ void __ptrace_unlink(struct task_struct *child) spin_unlock(&child->sighand->siglock); }
+static bool looks_like_a_spurious_pid(struct task_struct *task) +{ + if (task->exit_code != ((PTRACE_EVENT_EXEC << 8) | SIGTRAP)) + return false; + + if (task_pid_vnr(task) == task->ptrace_message) + return false; + /* + * The tracee changed its pid but the PTRACE_EVENT_EXEC event + * was not wait()'ed, most probably debugger targets the old + * leader which was destroyed in de_thread(). + */ + return true; +} + /* Ensure that nothing can wake it up, even SIGKILL */ static bool ptrace_freeze_traced(struct task_struct *task) { @@ -173,7 +188,8 @@ static bool ptrace_freeze_traced(struct task_struct *task) return ret;
spin_lock_irq(&task->sighand->siglock); - if (task_is_traced(task) && !__fatal_signal_pending(task)) { + if (task_is_traced(task) && !looks_like_a_spurious_pid(task) && + !__fatal_signal_pending(task)) { task->state = __TASK_TRACED; ret = true; }
From: Daniel Wagner dwagner@suse.de
stable inclusion from linux-4.19.192 commit 6671cd54fd21fe8c31fa5c4b28d3c7e96945cd35
--------------------------------
[ Upstream commit 85428beac80dbcace5b146b218697c73e367dcf5 ]
Reset the ns->file value to NULL also in the error case in nvmet_file_ns_enable().
The ns->file variable points either to file object or contains the error code after the filp_open() call. This can lead to following problem:
When the user first setups an invalid file backend and tries to enable the ns, it will fail. Then the user switches over to a bdev backend and enables successfully the ns. The first received I/O will crash the system because the IO backend is chosen based on the ns->file value:
static u16 nvmet_parse_io_cmd(struct nvmet_req *req) { [...]
if (req->ns->file) return nvmet_file_parse_io_cmd(req);
return nvmet_bdev_parse_io_cmd(req); }
Reported-by: Enzo Matsumiya ematsumiya@suse.com Signed-off-by: Daniel Wagner dwagner@suse.de Signed-off-by: Christoph Hellwig hch@lst.de Signed-off-by: Sasha Levin sashal@kernel.org Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- drivers/nvme/target/io-cmd-file.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-)
diff --git a/drivers/nvme/target/io-cmd-file.c b/drivers/nvme/target/io-cmd-file.c index 39d972e2595f0..ad6263cf7303c 100644 --- a/drivers/nvme/target/io-cmd-file.c +++ b/drivers/nvme/target/io-cmd-file.c @@ -38,9 +38,11 @@ int nvmet_file_ns_enable(struct nvmet_ns *ns)
ns->file = filp_open(ns->device_path, flags, 0); if (IS_ERR(ns->file)) { - pr_err("failed to open file %s: (%ld)\n", - ns->device_path, PTR_ERR(ns->file)); - return PTR_ERR(ns->file); + ret = PTR_ERR(ns->file); + pr_err("failed to open file %s: (%d)\n", + ns->device_path, ret); + ns->file = NULL; + return ret; }
ret = vfs_getattr(&ns->file->f_path,
From: Zqiang qiang.zhang@windriver.com
stable inclusion from linux-4.19.192 commit b31395e0d19017aaa83ee0e2dc0862954aca3aa3
--------------------------------
[ Upstream commit 3a010c493271f04578b133de977e0e5dd2848cea ]
When a interruptible mutex locker is interrupted by a signal without acquiring this lock and removed from the wait queue. if the mutex isn't contended enough to have a waiter put into the wait queue again, the setting of the WAITER bit will force mutex locker to go into the slowpath to acquire the lock every time, so if the wait queue is empty, the WAITER bit need to be clear.
Fixes: 040a0a371005 ("mutex: Add support for wound/wait style locks") Suggested-by: Peter Zijlstra peterz@infradead.org Signed-off-by: Zqiang qiang.zhang@windriver.com Signed-off-by: Peter Zijlstra (Intel) peterz@infradead.org Link: https://lkml.kernel.org/r/20210517034005.30828-1-qiang.zhang@windriver.com Signed-off-by: Sasha Levin sashal@kernel.org Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- kernel/locking/mutex-debug.c | 4 ++-- kernel/locking/mutex-debug.h | 2 +- kernel/locking/mutex.c | 18 +++++++++++++----- kernel/locking/mutex.h | 4 +--- 4 files changed, 17 insertions(+), 11 deletions(-)
diff --git a/kernel/locking/mutex-debug.c b/kernel/locking/mutex-debug.c index 9aa713629387c..839df4383799c 100644 --- a/kernel/locking/mutex-debug.c +++ b/kernel/locking/mutex-debug.c @@ -57,7 +57,7 @@ void debug_mutex_add_waiter(struct mutex *lock, struct mutex_waiter *waiter, task->blocked_on = waiter; }
-void mutex_remove_waiter(struct mutex *lock, struct mutex_waiter *waiter, +void debug_mutex_remove_waiter(struct mutex *lock, struct mutex_waiter *waiter, struct task_struct *task) { DEBUG_LOCKS_WARN_ON(list_empty(&waiter->list)); @@ -65,7 +65,7 @@ void mutex_remove_waiter(struct mutex *lock, struct mutex_waiter *waiter, DEBUG_LOCKS_WARN_ON(task->blocked_on != waiter); task->blocked_on = NULL;
- list_del_init(&waiter->list); + INIT_LIST_HEAD(&waiter->list); waiter->task = NULL; }
diff --git a/kernel/locking/mutex-debug.h b/kernel/locking/mutex-debug.h index 1edd3f45a4ecb..53e631e1d76da 100644 --- a/kernel/locking/mutex-debug.h +++ b/kernel/locking/mutex-debug.h @@ -22,7 +22,7 @@ extern void debug_mutex_free_waiter(struct mutex_waiter *waiter); extern void debug_mutex_add_waiter(struct mutex *lock, struct mutex_waiter *waiter, struct task_struct *task); -extern void mutex_remove_waiter(struct mutex *lock, struct mutex_waiter *waiter, +extern void debug_mutex_remove_waiter(struct mutex *lock, struct mutex_waiter *waiter, struct task_struct *task); extern void debug_mutex_unlock(struct mutex *lock); extern void debug_mutex_init(struct mutex *lock, const char *name, diff --git a/kernel/locking/mutex.c b/kernel/locking/mutex.c index b3da782cdfbd7..354151fef06ae 100644 --- a/kernel/locking/mutex.c +++ b/kernel/locking/mutex.c @@ -177,7 +177,7 @@ static inline bool __mutex_waiter_is_first(struct mutex *lock, struct mutex_wait * Add @waiter to a given location in the lock wait_list and set the * FLAG_WAITERS flag if it's the first waiter. */ -static void __sched +static void __mutex_add_waiter(struct mutex *lock, struct mutex_waiter *waiter, struct list_head *list) { @@ -188,6 +188,16 @@ __mutex_add_waiter(struct mutex *lock, struct mutex_waiter *waiter, __mutex_set_flag(lock, MUTEX_FLAG_WAITERS); }
+static void +__mutex_remove_waiter(struct mutex *lock, struct mutex_waiter *waiter) +{ + list_del(&waiter->list); + if (likely(list_empty(&lock->wait_list))) + __mutex_clear_flag(lock, MUTEX_FLAGS); + + debug_mutex_remove_waiter(lock, waiter, current); +} + /* * Give up ownership to a specific task, when @task = NULL, this is equivalent * to a regular unlock. Sets PICKUP on a handoff, clears HANDOF, preserves @@ -1040,9 +1050,7 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass, __ww_mutex_check_waiters(lock, ww_ctx); }
- mutex_remove_waiter(lock, &waiter, current); - if (likely(list_empty(&lock->wait_list))) - __mutex_clear_flag(lock, MUTEX_FLAGS); + __mutex_remove_waiter(lock, &waiter);
debug_mutex_free_waiter(&waiter);
@@ -1059,7 +1067,7 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass,
err: __set_current_state(TASK_RUNNING); - mutex_remove_waiter(lock, &waiter, current); + __mutex_remove_waiter(lock, &waiter); err_early_kill: spin_unlock(&lock->wait_lock); debug_mutex_free_waiter(&waiter); diff --git a/kernel/locking/mutex.h b/kernel/locking/mutex.h index 1c2287d3fa719..f0c710b1d1927 100644 --- a/kernel/locking/mutex.h +++ b/kernel/locking/mutex.h @@ -10,12 +10,10 @@ * !CONFIG_DEBUG_MUTEXES case. Most of them are NOPs: */
-#define mutex_remove_waiter(lock, waiter, task) \ - __list_del((waiter)->list.prev, (waiter)->list.next) - #define debug_mutex_wake_waiter(lock, waiter) do { } while (0) #define debug_mutex_free_waiter(waiter) do { } while (0) #define debug_mutex_add_waiter(lock, waiter, ti) do { } while (0) +#define debug_mutex_remove_waiter(lock, waiter, ti) do { } while (0) #define debug_mutex_unlock(lock) do { } while (0) #define debug_mutex_init(lock, name, key) do { } while (0)
From: Ronnie Sahlberg lsahlber@redhat.com
stable inclusion from linux-4.19.192 commit 4e95d8160aa7eaaa185b8c1d16294d9e79444aeb
--------------------------------
commit d201d7631ca170b038e7f8921120d05eec70d7c5 upstream.
When using smb2_copychunk_range() for large ranges we will run through several iterations of a loop calling SMB2_ioctl() but never actually free the returned buffer except for the final iteration. This leads to memory leaks everytime a large copychunk is requested.
Fixes: 9bf0c9cd4314 ("CIFS: Fix SMB2/SMB3 Copy offload support (refcopy) for large files") Cc: stable@vger.kernel.org Reviewed-by: Aurelien Aptel aaptel@suse.com Signed-off-by: Ronnie Sahlberg lsahlber@redhat.com Signed-off-by: Steve French stfrench@microsoft.com Signed-off-by: Greg Kroah-Hartman gregkh@linuxfoundation.org Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- fs/cifs/smb2ops.c | 2 ++ 1 file changed, 2 insertions(+)
diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c index b8d01c1234a47..8aa3ab7bb8029 100644 --- a/fs/cifs/smb2ops.c +++ b/fs/cifs/smb2ops.c @@ -1173,6 +1173,8 @@ smb2_copychunk_range(const unsigned int xid, cpu_to_le32(min_t(u32, len, tcon->max_bytes_chunk));
/* Request server copy to target from src identified by key */ + kfree(retbuf); + retbuf = NULL; rc = SMB2_ioctl(xid, tcon, trgtfile->fid.persistent_fid, trgtfile->fid.volatile_fid, FSCTL_SRV_COPYCHUNK_WRITE, true /* is_fsctl */, (char *)pcchunk,
From: Greg Kroah-Hartman gregkh@linuxfoundation.org
stable inclusion from linux-4.19.192 commit 14b3bb3da626129da01f6db48e13b9220b7913ed
--------------------------------
commit 94f88309f201821073f57ae6005caefa61bf7b7e upstream.
This reverts commit dcd0feac9bab901d5739de51b3f69840851f8919.
Because of recent interactions with developers from @umn.edu, all commits from them have been recently re-reviewed to ensure if they were correct or not.
Upon review, this commit was found to be incorrect for the reasons below, so it must be reverted. It will be fixed up "correctly" in a later kernel change.
The original commit message for this change was incorrect as the code path can never result in a NULL dereference, alluding to the fact that whatever tool was used to "find this" is broken. It's just an optional resource reservation, so removing this check is fine.
Cc: Kangjie Lu kjlu@umn.edu Acked-by: Takashi Iwai tiwai@suse.de Fixes: dcd0feac9bab ("ALSA: sb8: add a check for request_region") Cc: stable stable@vger.kernel.org Link: https://lore.kernel.org/r/20210503115736.2104747-35-gregkh@linuxfoundation.o... Signed-off-by: Greg Kroah-Hartman gregkh@linuxfoundation.org Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- sound/isa/sb/sb8.c | 4 ---- 1 file changed, 4 deletions(-)
diff --git a/sound/isa/sb/sb8.c b/sound/isa/sb/sb8.c index 1eb8b61a185be..d77dcba276b54 100644 --- a/sound/isa/sb/sb8.c +++ b/sound/isa/sb/sb8.c @@ -111,10 +111,6 @@ static int snd_sb8_probe(struct device *pdev, unsigned int dev)
/* block the 0x388 port to avoid PnP conflicts */ acard->fm_res = request_region(0x388, 4, "SoundBlaster FM"); - if (!acard->fm_res) { - err = -EBUSY; - goto _err; - }
if (port[dev] != SNDRV_AUTO_PORT) { if ((err = snd_sbdsp_create(card, port[dev], irq[dev],
From: Greg Kroah-Hartman gregkh@linuxfoundation.org
stable inclusion from linux-4.19.192 commit 15d3547758b369db1690adda381a360a0e821bed
--------------------------------
commit 5e68b86c7b7c059c0f0ec4bf8adabe63f84a61eb upstream.
This reverts commit 23015b22e47c5409620b1726a677d69e5cd032ba.
Because of recent interactions with developers from @umn.edu, all commits from them have been recently re-reviewed to ensure if they were correct or not.
Upon review, this commit was found to be incorrect for the reasons below, so it must be reverted. It will be fixed up "correctly" in a later kernel change.
The original commit has a memory leak on the error path here, it does not clean up everything properly.
Cc: Kangjie Lu kjlu@umn.edu Cc: Alexandre Bounine alex.bou9@gmail.com Cc: Matt Porter mporter@kernel.crashing.org Cc: Andrew Morton akpm@linux-foundation.org Cc: Linus Torvalds torvalds@linux-foundation.org Fixes: 23015b22e47c ("rapidio: fix a NULL pointer dereference when create_workqueue() fails") Cc: stable stable@vger.kernel.org Link: https://lore.kernel.org/r/20210503115736.2104747-45-gregkh@linuxfoundation.o... Signed-off-by: Greg Kroah-Hartman gregkh@linuxfoundation.org Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- drivers/rapidio/rio_cm.c | 8 -------- 1 file changed, 8 deletions(-)
diff --git a/drivers/rapidio/rio_cm.c b/drivers/rapidio/rio_cm.c index ef989a15aefc4..bad0e0ea4f305 100644 --- a/drivers/rapidio/rio_cm.c +++ b/drivers/rapidio/rio_cm.c @@ -2145,14 +2145,6 @@ static int riocm_add_mport(struct device *dev, mutex_init(&cm->rx_lock); riocm_rx_fill(cm, RIOCM_RX_RING_SIZE); cm->rx_wq = create_workqueue(DRV_NAME "/rxq"); - if (!cm->rx_wq) { - riocm_error("failed to allocate IBMBOX_%d on %s", - cmbox, mport->name); - rio_release_outb_mbox(mport, cmbox); - kfree(cm); - return -ENOMEM; - } - INIT_WORK(&cm->rx_work, rio_ibmsg_handler);
cm->tx_slot = 0;
From: Greg Kroah-Hartman gregkh@linuxfoundation.org
stable inclusion from linux-4.19.192 commit c5fc7a18c0616f468b30430ce34498d51481bfac
--------------------------------
commit 754f39158441f4c0d7a8255209dd9a939f08ce80 upstream.
This reverts commit 32f47179833b63de72427131169809065db6745e.
Because of recent interactions with developers from @umn.edu, all commits from them have been recently re-reviewed to ensure if they were correct or not.
Upon review, this commit was found to be not be needed at all as the change was useless because this function can only be called when of_match_device matched on something. So it should be reverted.
Cc: Aditya Pakki pakki001@umn.edu Cc: stable stable@vger.kernel.org Fixes: 32f47179833b ("serial: mvebu-uart: Fix to avoid a potential NULL pointer dereference") Acked-by: Jiri Slaby jirislaby@kernel.org Link: https://lore.kernel.org/r/20210503115736.2104747-6-gregkh@linuxfoundation.or... Signed-off-by: Greg Kroah-Hartman gregkh@linuxfoundation.org Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- drivers/tty/serial/mvebu-uart.c | 3 --- 1 file changed, 3 deletions(-)
diff --git a/drivers/tty/serial/mvebu-uart.c b/drivers/tty/serial/mvebu-uart.c index 7d26c9b57d8ed..170e446a2f625 100644 --- a/drivers/tty/serial/mvebu-uart.c +++ b/drivers/tty/serial/mvebu-uart.c @@ -799,9 +799,6 @@ static int mvebu_uart_probe(struct platform_device *pdev) return -EINVAL; }
- if (!match) - return -ENODEV; - /* Assume that all UART ports have a DT alias or none has */ id = of_alias_get_id(pdev->dev.of_node, "serial"); if (!pdev->dev.of_node || id < 0)
From: Mikulas Patocka mpatocka@redhat.com
stable inclusion from linux-4.19.192 commit 1ff004c41c8205d7677f7b9d7da238a5d9a29274
--------------------------------
commit c699a0db2d62e3bbb7f0bf35c87edbc8d23e3062 upstream.
The following commands will crash the kernel:
modprobe brd rd_size=1048576 dmsetup create o --table "0 `blockdev --getsize /dev/ram0` snapshot-origin /dev/ram0" dmsetup create s --table "0 `blockdev --getsize /dev/ram0` snapshot /dev/ram0 /dev/ram1 N 0"
The reason is that when we test for zero chunk size, we jump to the label bad_read_metadata without setting the "r" variable. The function snapshot_ctr destroys all the structures and then exits with "r == 0". The kernel then crashes because it falsely believes that snapshot_ctr succeeded.
In order to fix the bug, we set the variable "r" to -EINVAL.
Signed-off-by: Mikulas Patocka mpatocka@redhat.com Cc: stable@vger.kernel.org Signed-off-by: Mike Snitzer snitzer@redhat.com Signed-off-by: Greg Kroah-Hartman gregkh@linuxfoundation.org Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- drivers/md/dm-snap.c | 1 + 1 file changed, 1 insertion(+)
diff --git a/drivers/md/dm-snap.c b/drivers/md/dm-snap.c index 9e930a150aa29..7d96836294713 100644 --- a/drivers/md/dm-snap.c +++ b/drivers/md/dm-snap.c @@ -1286,6 +1286,7 @@ static int snapshot_ctr(struct dm_target *ti, unsigned int argc, char **argv)
if (!s->store->chunk_size) { ti->error = "Chunk size not set"; + r = -EINVAL; goto bad_read_metadata; }
From: Greg Kroah-Hartman gregkh@linuxfoundation.org
stable inclusion from linux-4.19.192 commit 84c19f6a5cbb0c4bc96fbb7b29acdba918e94992
--------------------------------
commit 58c0cc2d90f1e37c4eb63ae7f164c83830833f78 upstream.
This reverts commit ec7f6aad57ad29e4e66cc2e18e1e1599ddb02542.
Because of recent interactions with developers from @umn.edu, all commits from them have been recently re-reviewed to ensure if they were correct or not.
Upon review, this commit was found to be incorrect for the reasons below, so it must be reverted. It will be fixed up "correctly" in a later kernel change.
This patch "looks" correct, but the driver keeps on running and will fail horribly right afterward if this error condition ever trips.
So points for trying to resolve an issue, but a huge NEGATIVE value for providing a "fake" fix for the problem as nothing actually got resolved at all. I'll go fix this up properly...
Cc: Kangjie Lu kjlu@umn.edu Cc: Aditya Pakki pakki001@umn.edu Cc: Ferenc Bakonyi fero@drama.obuda.kando.hu Cc: Bartlomiej Zolnierkiewicz b.zolnierkie@samsung.com Fixes: ec7f6aad57ad ("video: hgafb: fix potential NULL pointer dereference") Cc: stable stable@vger.kernel.org Link: https://lore.kernel.org/r/20210503115736.2104747-39-gregkh@linuxfoundation.o... Signed-off-by: Greg Kroah-Hartman gregkh@linuxfoundation.org Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- drivers/video/fbdev/hgafb.c | 2 -- 1 file changed, 2 deletions(-)
diff --git a/drivers/video/fbdev/hgafb.c b/drivers/video/fbdev/hgafb.c index 59e1cae579481..4630285431736 100644 --- a/drivers/video/fbdev/hgafb.c +++ b/drivers/video/fbdev/hgafb.c @@ -285,8 +285,6 @@ static int hga_card_detect(void) hga_vram_len = 0x08000;
hga_vram = ioremap(0xb0000, hga_vram_len); - if (!hga_vram) - goto error;
if (request_region(0x3b0, 12, "hgafb")) release_io_ports = 1;
From: Greg Kroah-Hartman gregkh@linuxfoundation.org
stable inclusion from linux-4.19.192 commit 84a7ffe7a4a3a742180109e273d2e400ed0c1ace
--------------------------------
commit bee1b0511844c8c79fccf1f2b13472393b6b91f7 upstream.
This reverts commit f86a3b83833e7cfe558ca4d70b64ebc48903efec.
Because of recent interactions with developers from @umn.edu, all commits from them have been recently re-reviewed to ensure if they were correct or not.
Upon review, this commit was found to be incorrect for the reasons below, so it must be reverted. It will be fixed up "correctly" in a later kernel change.
The original commit causes a memory leak when it is trying to claim it is properly handling errors. Revert this change and fix it up properly in a follow-on commit.
Cc: Kangjie Lu kjlu@umn.edu Cc: David S. Miller davem@davemloft.net Fixes: f86a3b83833e ("net: stmicro: fix a missing check of clk_prepare") Cc: stable stable@vger.kernel.org Link: https://lore.kernel.org/r/20210503115736.2104747-21-gregkh@linuxfoundation.o... Signed-off-by: Greg Kroah-Hartman gregkh@linuxfoundation.org Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- drivers/net/ethernet/stmicro/stmmac/dwmac-sunxi.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-)
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-sunxi.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-sunxi.c index fc1fa0f9f3387..ee5c0c6263516 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-sunxi.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-sunxi.c @@ -59,9 +59,7 @@ static int sun7i_gmac_init(struct platform_device *pdev, void *priv) gmac->clk_enabled = 1; } else { clk_set_rate(gmac->tx_clk, SUN7I_GMAC_MII_RATE); - ret = clk_prepare(gmac->tx_clk); - if (ret) - return ret; + clk_prepare(gmac->tx_clk); }
return 0;
From: Greg Kroah-Hartman gregkh@linuxfoundation.org
stable inclusion from linux-4.19.192 commit f7bce372a9e415990c6b9852febc7fb0ffc93729
--------------------------------
commit 8d1beda5f11953ffe135a5213287f0b25b4da41b upstream.
This reverts commit 248b57015f35c94d4eae2fdd8c6febf5cd703900.
Because of recent interactions with developers from @umn.edu, all commits from them have been recently re-reviewed to ensure if they were correct or not.
Upon review, this commit was found to be incorrect for the reasons below, so it must be reverted. It will be fixed up "correctly" in a later kernel change.
The original commit does not properly unwind if there is an error condition so it needs to be reverted at this point in time.
Cc: Kangjie Lu kjlu@umn.edu Cc: Jacek Anaszewski jacek.anaszewski@gmail.com Cc: stable stable@vger.kernel.org Fixes: 248b57015f35 ("leds: lp5523: fix a missing check of return value of lp55xx_read") Link: https://lore.kernel.org/r/20210503115736.2104747-9-gregkh@linuxfoundation.or... Signed-off-by: Greg Kroah-Hartman gregkh@linuxfoundation.org Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- drivers/leds/leds-lp5523.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-)
diff --git a/drivers/leds/leds-lp5523.c b/drivers/leds/leds-lp5523.c index fd64df5a57a5e..a2e74feee2b2f 100644 --- a/drivers/leds/leds-lp5523.c +++ b/drivers/leds/leds-lp5523.c @@ -318,9 +318,7 @@ static int lp5523_init_program_engine(struct lp55xx_chip *chip)
/* Let the programs run for couple of ms and check the engine status */ usleep_range(3000, 6000); - ret = lp55xx_read(chip, LP5523_REG_STATUS, &status); - if (ret) - return ret; + lp55xx_read(chip, LP5523_REG_STATUS, &status); status &= LP5523_ENG_STATUS_MASK;
if (status != LP5523_ENG_STATUS_MASK) {
From: Greg Kroah-Hartman gregkh@linuxfoundation.org
stable inclusion from linux-4.19.192 commit ec19a4fba56a0ca3143f2ac192764f769f88453a
--------------------------------
commit 99ae3417672a6d4a3bf68d4fc43d7c6ca074d477 upstream.
This reverts commit 9aa3aa15f4c2f74f47afd6c5db4b420fadf3f315.
Because of recent interactions with developers from @umn.edu, all commits from them have been recently re-reviewed to ensure if they were correct or not.
Upon review, it was determined that this commit is not needed at all so just revert it. Also, the call to lm80_init_client() was not properly handled, so if error handling is needed in the lm80_probe() function, then it should be done properly, not half-baked like the commit being reverted here did.
Cc: Kangjie Lu kjlu@umn.edu Fixes: 9aa3aa15f4c2 ("hwmon: (lm80) fix a missing check of bus read in lm80 probe") Cc: stable stable@vger.kernel.org Acked-by: Guenter Roeck linux@roeck-us.net Link: https://lore.kernel.org/r/20210503115736.2104747-5-gregkh@linuxfoundation.or... Signed-off-by: Greg Kroah-Hartman gregkh@linuxfoundation.org Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- drivers/hwmon/lm80.c | 11 ++--------- 1 file changed, 2 insertions(+), 9 deletions(-)
diff --git a/drivers/hwmon/lm80.c b/drivers/hwmon/lm80.c index f9b8e3e23a8e8..dc2bd82b32021 100644 --- a/drivers/hwmon/lm80.c +++ b/drivers/hwmon/lm80.c @@ -630,7 +630,6 @@ static int lm80_probe(struct i2c_client *client, struct device *dev = &client->dev; struct device *hwmon_dev; struct lm80_data *data; - int rv;
data = devm_kzalloc(dev, sizeof(struct lm80_data), GFP_KERNEL); if (!data) @@ -643,14 +642,8 @@ static int lm80_probe(struct i2c_client *client, lm80_init_client(client);
/* A few vars need to be filled upon startup */ - rv = lm80_read_value(client, LM80_REG_FAN_MIN(1)); - if (rv < 0) - return rv; - data->fan[f_min][0] = rv; - rv = lm80_read_value(client, LM80_REG_FAN_MIN(2)); - if (rv < 0) - return rv; - data->fan[f_min][1] = rv; + data->fan[f_min][0] = lm80_read_value(client, LM80_REG_FAN_MIN(1)); + data->fan[f_min][1] = lm80_read_value(client, LM80_REG_FAN_MIN(2));
hwmon_dev = devm_hwmon_device_register_with_groups(dev, client->name, data, lm80_groups);
From: Greg Kroah-Hartman gregkh@linuxfoundation.org
stable inclusion from linux-4.19.192 commit 94deabc3da468888b9abd8d7f4df3e7d1a43e497
--------------------------------
commit ed04fe8a0e87d7b5ea17d47f4ac9ec962b24814a upstream.
This reverts commit 1d84353d205a953e2381044953b7fa31c8c9702d.
Because of recent interactions with developers from @umn.edu, all commits from them have been recently re-reviewed to ensure if they were correct or not.
Upon review, this commit was found to be incorrect for the reasons below, so it must be reverted. It will be fixed up "correctly" in a later kernel change.
The original commit here, while technically correct, did not fully handle all of the reported issues that the commit stated it was fixing, so revert it until it can be "fixed" fully.
Note, ioremap() probably will never fail for old hardware like this, and if anyone actually used this hardware (a PowerMac era PCI display card), they would not be using fbdev anymore.
Cc: Kangjie Lu kjlu@umn.edu Cc: Aditya Pakki pakki001@umn.edu Cc: Finn Thain fthain@telegraphics.com.au Cc: Bartlomiej Zolnierkiewicz b.zolnierkie@samsung.com Reviewed-by: Rob Herring robh@kernel.org Fixes: 1d84353d205a ("video: imsttfb: fix potential NULL pointer dereferences") Cc: stable stable@vger.kernel.org Link: https://lore.kernel.org/r/20210503115736.2104747-67-gregkh@linuxfoundation.o... Signed-off-by: Greg Kroah-Hartman gregkh@linuxfoundation.org Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- drivers/video/fbdev/imsttfb.c | 5 ----- 1 file changed, 5 deletions(-)
diff --git a/drivers/video/fbdev/imsttfb.c b/drivers/video/fbdev/imsttfb.c index ffcf553719a31..ecdcf358ad5ea 100644 --- a/drivers/video/fbdev/imsttfb.c +++ b/drivers/video/fbdev/imsttfb.c @@ -1516,11 +1516,6 @@ static int imsttfb_probe(struct pci_dev *pdev, const struct pci_device_id *ent) info->fix.smem_start = addr; info->screen_base = (__u8 *)ioremap(addr, par->ramdac == IBM ? 0x400000 : 0x800000); - if (!info->screen_base) { - release_mem_region(addr, size); - framebuffer_release(info); - return -ENOMEM; - } info->fix.mmio_start = addr + 0x800000; par->dc_regs = ioremap(addr + 0x800000, 0x1000); par->cmap_regs_phys = addr + 0x840000;
From: Greg Kroah-Hartman gregkh@linuxfoundation.org
stable inclusion from linux-4.19.192 commit 12b6934b22083a9ab30db104d81c49e43a5ab1c8
--------------------------------
commit 4d427b408c4c2ff1676966c72119a3a559f8e39b upstream.
This reverts commit 63a06181d7ce169d09843645c50fea1901bc9f0a.
Because of recent interactions with developers from @umn.edu, all commits from them have been recently re-reviewed to ensure if they were correct or not.
Upon review, this commit was found to be incorrect for the reasons below, so it must be reverted. It will be fixed up "correctly" in a later kernel change.
The original commit is incorrect, it does not properly clean up on the error path, so I'll keep the revert and fix it up properly with a follow-on patch.
Cc: Kangjie Lu kjlu@umn.edu Cc: Avri Altman avri.altman@wdc.com Cc: Martin K. Petersen martin.petersen@oracle.com Fixes: 63a06181d7ce ("scsi: ufs: fix a missing check of devm_reset_control_get") Cc: stable stable@vger.kernel.org Link: https://lore.kernel.org/r/20210503115736.2104747-31-gregkh@linuxfoundation.o... Signed-off-by: Greg Kroah-Hartman gregkh@linuxfoundation.org Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- drivers/scsi/ufs/ufs-hisi.c | 4 ---- 1 file changed, 4 deletions(-)
diff --git a/drivers/scsi/ufs/ufs-hisi.c b/drivers/scsi/ufs/ufs-hisi.c index c2cee73a8560d..452e19f8fb470 100644 --- a/drivers/scsi/ufs/ufs-hisi.c +++ b/drivers/scsi/ufs/ufs-hisi.c @@ -544,10 +544,6 @@ static int ufs_hisi_init_common(struct ufs_hba *hba) ufshcd_set_variant(hba, host);
host->rst = devm_reset_control_get(dev, "rst"); - if (IS_ERR(host->rst)) { - dev_err(dev, "%s: failed to get reset control\n", __func__); - return PTR_ERR(host->rst); - }
ufs_hisi_set_pm_lvl(hba);
From: Greg Kroah-Hartman gregkh@linuxfoundation.org
stable inclusion from linux-4.19.192 commit 69d17230341a313091ad10713acd2aa33acfc3b7
--------------------------------
commit 257343d3ed557f11d580d0b7c515dc154f64a42b upstream.
This reverts commit 093c48213ee37c3c3ff1cf5ac1aa2a9d8bc66017.
Because of recent interactions with developers from @umn.edu, all commits from them have been recently re-reviewed to ensure if they were correct or not.
Upon review, this commit was found to be incorrect for the reasons below, so it must be reverted. It will be fixed up "correctly" in a later kernel change.
Because of this, all submissions from this group must be reverted from the kernel tree and will need to be re-reviewed again to determine if they actually are a valid fix. Until that work is complete, remove this change to ensure that no problems are being introduced into the codebase.
Cc: Wenwen Wang wang6495@umn.edu Cc: Peter Rosin peda@axentia.se Cc: Jens Axboe axboe@kernel.dk Fixes: 093c48213ee3 ("gdrom: fix a memory leak bug") Cc: stable stable@vger.kernel.org Link: https://lore.kernel.org/r/20210503115736.2104747-27-gregkh@linuxfoundation.o... Signed-off-by: Greg Kroah-Hartman gregkh@linuxfoundation.org Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- drivers/cdrom/gdrom.c | 1 - 1 file changed, 1 deletion(-)
diff --git a/drivers/cdrom/gdrom.c b/drivers/cdrom/gdrom.c index 72cd96a8eb19d..ae3a7537cf0fb 100644 --- a/drivers/cdrom/gdrom.c +++ b/drivers/cdrom/gdrom.c @@ -889,7 +889,6 @@ static void __exit exit_gdrom(void) platform_device_unregister(pd); platform_driver_unregister(&gdrom_driver); kfree(gd.toc); - kfree(gd.cd_info); }
module_init(init_gdrom);
From: Greg Kroah-Hartman gregkh@linuxfoundation.org
stable inclusion from linux-4.19.192 commit 4d08695b76ba20eff037ccb32cf3945f46498185
--------------------------------
commit 3e465fc3846734e9489273d889f19cc17b4cf4bd upstream.
This reverts commit d39083234c60519724c6ed59509a2129fd2aed41.
Because of recent interactions with developers from @umn.edu, all commits from them have been recently re-reviewed to ensure if they were correct or not.
Upon review, it was determined that this commit is not needed at all as the media core already prevents memory disclosure on this codepath, so just drop the extra memset happening here.
Cc: Kangjie Lu kjlu@umn.edu Cc: Geert Uytterhoeven geert+renesas@glider.be Cc: Hans Verkuil hverkuil-cisco@xs4all.nl Cc: Mauro Carvalho Chehab mchehab@kernel.org Fixes: d39083234c60 ("media: rcar_drif: fix a memory disclosure") Cc: stable stable@vger.kernel.org Reviewed-by: Mauro Carvalho Chehab mchehab+huawei@kernel.org Reviewed-by: Fabrizio Castro fabrizio.castro.jz@renesas.com Link: https://lore.kernel.org/r/20210503115736.2104747-4-gregkh@linuxfoundation.or... Signed-off-by: Greg Kroah-Hartman gregkh@linuxfoundation.org Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- drivers/media/platform/rcar_drif.c | 1 - 1 file changed, 1 deletion(-)
diff --git a/drivers/media/platform/rcar_drif.c b/drivers/media/platform/rcar_drif.c index b677d014e7bab..81413ab52475d 100644 --- a/drivers/media/platform/rcar_drif.c +++ b/drivers/media/platform/rcar_drif.c @@ -912,7 +912,6 @@ static int rcar_drif_g_fmt_sdr_cap(struct file *file, void *priv, { struct rcar_drif_sdr *sdr = video_drvdata(file);
- memset(f->fmt.sdr.reserved, 0, sizeof(f->fmt.sdr.reserved)); f->fmt.sdr.pixelformat = sdr->fmt->pixelformat; f->fmt.sdr.buffersize = sdr->fmt->buffersize;
From: Greg Kroah-Hartman gregkh@linuxfoundation.org
stable inclusion from linux-4.19.192 commit ff867789b504162c982b99d463b77d4320fe478d
--------------------------------
commit 68c5634c4a7278672a3bed00eb5646884257c413 upstream.
This reverts commit 765976285a8c8db3f0eb7f033829a899d0c2786e.
Because of recent interactions with developers from @umn.edu, all commits from them have been recently re-reviewed to ensure if they were correct or not.
Upon review, this commit was found to be incorrect for the reasons below, so it must be reverted. It will be fixed up "correctly" in a later kernel change.
This commit is not correct, it should not have used unlikely() and is not propagating the error properly to the calling function, so it should be reverted at this point in time. Also, if the check failed, the work queue was still assumed to be allocated, so further accesses would have continued to fail, meaning this patch does nothing to solve the root issues at all.
Cc: Kangjie Lu kjlu@umn.edu Cc: Kalle Valo kvalo@codeaurora.org Cc: Bryan Brattlof hello@bryanbrattlof.com Fixes: 765976285a8c ("rtlwifi: fix a potential NULL pointer dereference") Cc: stable stable@vger.kernel.org Link: https://lore.kernel.org/r/20210503115736.2104747-13-gregkh@linuxfoundation.o... Signed-off-by: Greg Kroah-Hartman gregkh@linuxfoundation.org Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- drivers/net/wireless/realtek/rtlwifi/base.c | 5 ----- 1 file changed, 5 deletions(-)
diff --git a/drivers/net/wireless/realtek/rtlwifi/base.c b/drivers/net/wireless/realtek/rtlwifi/base.c index a3189294ecb80..ef9b502ce576b 100644 --- a/drivers/net/wireless/realtek/rtlwifi/base.c +++ b/drivers/net/wireless/realtek/rtlwifi/base.c @@ -469,11 +469,6 @@ static void _rtl_init_deferred_work(struct ieee80211_hw *hw) /* <2> work queue */ rtlpriv->works.hw = hw; rtlpriv->works.rtl_wq = alloc_workqueue("%s", 0, 0, rtlpriv->cfg->name); - if (unlikely(!rtlpriv->works.rtl_wq)) { - pr_err("Failed to allocate work queue\n"); - return; - } - INIT_DELAYED_WORK(&rtlpriv->works.watchdog_wq, (void *)rtl_watchdog_wq_callback); INIT_DELAYED_WORK(&rtlpriv->works.ips_nic_off_wq,
From: Greg Kroah-Hartman gregkh@linuxfoundation.org
stable inclusion from linux-4.19.192 commit 4190fc7c261cc0431483f44adedcce87320a6b8e
--------------------------------
commit b95b57dfe7a142bf2446548eb7f49340fd73e78b upstream.
This reverts commit 5bf7295fe34a5251b1d241b9736af4697b590670.
Because of recent interactions with developers from @umn.edu, all commits from them have been recently re-reviewed to ensure if they were correct or not.
Upon review, this commit was found to be incorrect for the reasons below, so it must be reverted. It will be fixed up "correctly" in a later kernel change.
This commit does not properly detect if an error happens because the logic after this loop will not detect that there was a failed allocation.
Cc: Aditya Pakki pakki001@umn.edu Cc: David S. Miller davem@davemloft.net Fixes: 5bf7295fe34a ("qlcnic: Avoid potential NULL pointer dereference") Cc: stable stable@vger.kernel.org Link: https://lore.kernel.org/r/20210503115736.2104747-25-gregkh@linuxfoundation.o... Signed-off-by: Greg Kroah-Hartman gregkh@linuxfoundation.org Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- drivers/net/ethernet/qlogic/qlcnic/qlcnic_ethtool.c | 2 -- 1 file changed, 2 deletions(-)
diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_ethtool.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_ethtool.c index a4cd6f2cfb862..3b0adda7cc9c6 100644 --- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_ethtool.c +++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_ethtool.c @@ -1048,8 +1048,6 @@ int qlcnic_do_lb_test(struct qlcnic_adapter *adapter, u8 mode)
for (i = 0; i < QLCNIC_NUM_ILB_PKT; i++) { skb = netdev_alloc_skb(adapter->netdev, QLCNIC_ILB_PKT_SIZE); - if (!skb) - break; qlcnic_create_loopback_buff(skb->data, adapter->mac_addr); skb_put(skb, QLCNIC_ILB_PKT_SIZE); adapter->ahw->diag_cnt = 0;
From: Greg Kroah-Hartman gregkh@linuxfoundation.org
stable inclusion from linux-4.19.192 commit 8432db9d8a34f26030cee45e64a56a2e033d3ba6
--------------------------------
commit 7930742d6a0ff091c85b92ef4e076432d8d8cb79 upstream.
This reverts commit 26fd962bde0b15e54234fe762d86bc0349df1de4.
Because of recent interactions with developers from @umn.edu, all commits from them have been recently re-reviewed to ensure if they were correct or not.
Upon review, this commit was found to be incorrect for the reasons below, so it must be reverted. It will be fixed up "correctly" in a later kernel change.
The change here was incorrect. While it is nice to check if niu_pci_eeprom_read() succeeded or not when using the data, any error that might have happened was not propagated upwards properly, causing the kernel to assume that these reads were successful, which results in invalid data in the buffer that was to contain the successfully read data.
Cc: Kangjie Lu kjlu@umn.edu Cc: Shannon Nelson shannon.lee.nelson@gmail.com Cc: David S. Miller davem@davemloft.net Fixes: 26fd962bde0b ("niu: fix missing checks of niu_pci_eeprom_read") Cc: stable stable@vger.kernel.org Link: https://lore.kernel.org/r/20210503115736.2104747-23-gregkh@linuxfoundation.o... Signed-off-by: Greg Kroah-Hartman gregkh@linuxfoundation.org Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- drivers/net/ethernet/sun/niu.c | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-)
diff --git a/drivers/net/ethernet/sun/niu.c b/drivers/net/ethernet/sun/niu.c index d84501441edde..9319d84bf49f0 100644 --- a/drivers/net/ethernet/sun/niu.c +++ b/drivers/net/ethernet/sun/niu.c @@ -8100,8 +8100,6 @@ static int niu_pci_vpd_scan_props(struct niu *np, u32 start, u32 end) start += 3;
prop_len = niu_pci_eeprom_read(np, start + 4); - if (prop_len < 0) - return prop_len; err = niu_pci_vpd_get_propname(np, start + 5, namebuf, 64); if (err < 0) return err; @@ -8146,12 +8144,8 @@ static int niu_pci_vpd_scan_props(struct niu *np, u32 start, u32 end) netif_printk(np, probe, KERN_DEBUG, np->dev, "VPD_SCAN: Reading in property [%s] len[%d]\n", namebuf, prop_len); - for (i = 0; i < prop_len; i++) { - err = niu_pci_eeprom_read(np, off + i); - if (err >= 0) - *prop_buf = err; - ++prop_buf; - } + for (i = 0; i < prop_len; i++) + *prop_buf++ = niu_pci_eeprom_read(np, off + i); }
start += len;
From: "Maciej W. Rozycki" macro@orcam.me.uk
stable inclusion from linux-4.19.192 commit 9a71ed8da907c36de4e96a8d78216231c0fe8df5
--------------------------------
commit d4d0ad57b3865795c4cde2fb5094c594c2e8f469 upstream.
Fix an issue with VGA console font size changes made after the initial video text mode has been changed with a user tool like `svgatextmode' calling the VT_RESIZEX ioctl. As it stands in that case the original screen geometry continues being used to validate further VT resizing.
Consequently when the video adapter is firstly reprogrammed from the original say 80x25 text mode using a 9x16 character cell (720x400 pixel resolution) to say 80x37 text mode and the same character cell (720x592 pixel resolution), and secondly the CRTC character cell updated to 9x8 (by loading a suitable font with the KD_FONT_OP_SET request of the KDFONTOP ioctl), the VT geometry does not get further updated from 80x37 and only upper half of the screen is used for the VT, with the lower half showing rubbish corresponding to whatever happens to be there in the video memory that maps to that part of the screen. Of course the proportions change according to text mode geometries and font sizes chosen.
Address the problem then, by updating the text mode geometry defaults rather than checking against them whenever the VT is resized via a user ioctl.
Signed-off-by: Maciej W. Rozycki macro@orcam.me.uk Fixes: e400b6ec4ede ("vt/vgacon: Check if screen resize request comes from userspace") Cc: stable@vger.kernel.org # v2.6.24+ Signed-off-by: Linus Torvalds torvalds@linux-foundation.org Signed-off-by: Greg Kroah-Hartman gregkh@linuxfoundation.org Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- drivers/video/console/vgacon.c | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-)
diff --git a/drivers/video/console/vgacon.c b/drivers/video/console/vgacon.c index e079b910feb24..9463a7cf82222 100644 --- a/drivers/video/console/vgacon.c +++ b/drivers/video/console/vgacon.c @@ -1323,12 +1323,20 @@ static int vgacon_resize(struct vc_data *c, unsigned int width, if ((width << 1) * height > vga_vram_size) return -EINVAL;
+ if (user) { + /* + * Ho ho! Someone (svgatextmode, eh?) may have reprogrammed + * the video mode! Set the new defaults then and go away. + */ + screen_info.orig_video_cols = width; + screen_info.orig_video_lines = height; + vga_default_font_height = c->vc_font.height; + return 0; + } if (width % 2 || width > screen_info.orig_video_cols || height > (screen_info.orig_video_lines * vga_default_font_height)/ c->vc_font.height) - /* let svgatextmode tinker with video timings and - return success */ - return (user) ? 0 : -EINVAL; + return -EINVAL;
if (con_is_visible(c) && !vga_is_gfx) /* who knows */ vgacon_doresize(c, width, height);
From: "Maciej W. Rozycki" macro@orcam.me.uk
stable inclusion from linux-4.19.192 commit 8c5ec4a731e1e2d9b6906bcde62de57a609a9b86
--------------------------------
commit 860dafa902595fb5f1d23bbcce1215188c3341e6 upstream.
Restore the original intent of the VT_RESIZEX ioctl's `v_clin' parameter which is the number of pixel rows per character (cell) rather than the height of the font used.
For framebuffer devices the two values are always the same, because the former is inferred from the latter one. For VGA used as a true text mode device these two parameters are independent from each other: the number of pixel rows per character is set in the CRT controller, while font height is in fact hardwired to 32 pixel rows and fonts of heights below that value are handled by padding their data with blanks when loaded to hardware for use by the character generator. One can change the setting in the CRT controller and it will update the screen contents accordingly regardless of the font loaded.
The `v_clin' parameter is used by the `vgacon' driver to set the height of the character cell and then the cursor position within. Make the parameter explicit then, by defining a new `vc_cell_height' struct member of `vc_data', set it instead of `vc_font.height' from `v_clin' in the VT_RESIZEX ioctl, and then use it throughout the `vgacon' driver except where actual font data is accessed which as noted above is independent from the CRTC setting.
This way the framebuffer console driver is free to ignore the `v_clin' parameter as irrelevant, as it always should have, avoiding any issues attempts to give the parameter a meaning there could have caused, such as one that has led to commit 988d0763361b ("vt_ioctl: make VT_RESIZEX behave like VT_RESIZE"):
"syzbot is reporting UAF/OOB read at bit_putcs()/soft_cursor() [1][2], for vt_resizex() from ioctl(VT_RESIZEX) allows setting font height larger than actual font height calculated by con_font_set() from ioctl(PIO_FONT). Since fbcon_set_font() from con_font_set() allocates minimal amount of memory based on actual font height calculated by con_font_set(), use of vt_resizex() can cause UAF/OOB read for font data."
The problem first appeared around Linux 2.5.66 which predates our repo history, but the origin could be identified with the old MIPS/Linux repo also at: git://git.kernel.org/pub/scm/linux/kernel/git/ralf/linux.git as commit 9736a3546de7 ("Merge with Linux 2.5.66."), where VT_RESIZEX code in `vt_ioctl' was updated as follows:
if (clin) - video_font_height = clin; + vc->vc_font.height = clin;
making the parameter apply to framebuffer devices as well, perhaps due to the use of "font" in the name of the original `video_font_height' variable. Use "cell" in the new struct member then to avoid ambiguity.
References:
[1] https://syzkaller.appspot.com/bug?id=32577e96d88447ded2d3b76d71254fb85524583... [2] https://syzkaller.appspot.com/bug?id=6b8355d27b2b94fb5cedf4655e3a59162d9e48e...
Signed-off-by: Maciej W. Rozycki macro@orcam.me.uk Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Cc: stable@vger.kernel.org # v2.6.12+ Signed-off-by: Linus Torvalds torvalds@linux-foundation.org Signed-off-by: Greg Kroah-Hartman gregkh@linuxfoundation.org Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- drivers/tty/vt/vt_ioctl.c | 6 ++--- drivers/video/console/vgacon.c | 44 +++++++++++++++++----------------- include/linux/console_struct.h | 1 + 3 files changed, 26 insertions(+), 25 deletions(-)
diff --git a/drivers/tty/vt/vt_ioctl.c b/drivers/tty/vt/vt_ioctl.c index 2e959563af534..ce6c7dd7bc126 100644 --- a/drivers/tty/vt/vt_ioctl.c +++ b/drivers/tty/vt/vt_ioctl.c @@ -895,17 +895,17 @@ int vt_ioctl(struct tty_struct *tty, if (vcp) { int ret; int save_scan_lines = vcp->vc_scan_lines; - int save_font_height = vcp->vc_font.height; + int save_cell_height = vcp->vc_cell_height;
if (v.v_vlin) vcp->vc_scan_lines = v.v_vlin; if (v.v_clin) - vcp->vc_font.height = v.v_clin; + vcp->vc_cell_height = v.v_clin; vcp->vc_resize_user = 1; ret = vc_resize(vcp, v.v_cols, v.v_rows); if (ret) { vcp->vc_scan_lines = save_scan_lines; - vcp->vc_font.height = save_font_height; + vcp->vc_cell_height = save_cell_height; console_unlock(); return ret; } diff --git a/drivers/video/console/vgacon.c b/drivers/video/console/vgacon.c index 9463a7cf82222..ab9a602f4126d 100644 --- a/drivers/video/console/vgacon.c +++ b/drivers/video/console/vgacon.c @@ -600,7 +600,7 @@ static void vgacon_init(struct vc_data *c, int init) vc_resize(c, vga_video_num_columns, vga_video_num_lines);
c->vc_scan_lines = vga_scan_lines; - c->vc_font.height = vga_video_font_height; + c->vc_font.height = c->vc_cell_height = vga_video_font_height; c->vc_complement_mask = 0x7700; if (vga_512_chars) c->vc_hi_font_mask = 0x0800; @@ -733,32 +733,32 @@ static void vgacon_cursor(struct vc_data *c, int mode) switch (c->vc_cursor_type & 0x0f) { case CUR_UNDERLINE: vgacon_set_cursor_size(c->vc_x, - c->vc_font.height - - (c->vc_font.height < + c->vc_cell_height - + (c->vc_cell_height < 10 ? 2 : 3), - c->vc_font.height - - (c->vc_font.height < + c->vc_cell_height - + (c->vc_cell_height < 10 ? 1 : 2)); break; case CUR_TWO_THIRDS: vgacon_set_cursor_size(c->vc_x, - c->vc_font.height / 3, - c->vc_font.height - - (c->vc_font.height < + c->vc_cell_height / 3, + c->vc_cell_height - + (c->vc_cell_height < 10 ? 1 : 2)); break; case CUR_LOWER_THIRD: vgacon_set_cursor_size(c->vc_x, - (c->vc_font.height * 2) / 3, - c->vc_font.height - - (c->vc_font.height < + (c->vc_cell_height * 2) / 3, + c->vc_cell_height - + (c->vc_cell_height < 10 ? 1 : 2)); break; case CUR_LOWER_HALF: vgacon_set_cursor_size(c->vc_x, - c->vc_font.height / 2, - c->vc_font.height - - (c->vc_font.height < + c->vc_cell_height / 2, + c->vc_cell_height - + (c->vc_cell_height < 10 ? 1 : 2)); break; case CUR_NONE: @@ -769,7 +769,7 @@ static void vgacon_cursor(struct vc_data *c, int mode) break; default: vgacon_set_cursor_size(c->vc_x, 1, - c->vc_font.height); + c->vc_cell_height); break; } break; @@ -780,13 +780,13 @@ static int vgacon_doresize(struct vc_data *c, unsigned int width, unsigned int height) { unsigned long flags; - unsigned int scanlines = height * c->vc_font.height; + unsigned int scanlines = height * c->vc_cell_height; u8 scanlines_lo = 0, r7 = 0, vsync_end = 0, mode, max_scan;
raw_spin_lock_irqsave(&vga_lock, flags);
vgacon_xres = width * VGA_FONTWIDTH; - vgacon_yres = height * c->vc_font.height; + vgacon_yres = height * c->vc_cell_height; if (vga_video_type >= VIDEO_TYPE_VGAC) { outb_p(VGA_CRTC_MAX_SCAN, vga_video_port_reg); max_scan = inb_p(vga_video_port_val); @@ -841,9 +841,9 @@ static int vgacon_doresize(struct vc_data *c, static int vgacon_switch(struct vc_data *c) { int x = c->vc_cols * VGA_FONTWIDTH; - int y = c->vc_rows * c->vc_font.height; + int y = c->vc_rows * c->vc_cell_height; int rows = screen_info.orig_video_lines * vga_default_font_height/ - c->vc_font.height; + c->vc_cell_height; /* * We need to save screen size here as it's the only way * we can spot the screen has been resized and we need to @@ -1275,7 +1275,7 @@ static int vgacon_adjust_height(struct vc_data *vc, unsigned fontheight) cursor_size_lastto = 0; c->vc_sw->con_cursor(c, CM_DRAW); } - c->vc_font.height = fontheight; + c->vc_font.height = c->vc_cell_height = fontheight; vc_resize(c, 0, rows); /* Adjust console size */ } } @@ -1330,12 +1330,12 @@ static int vgacon_resize(struct vc_data *c, unsigned int width, */ screen_info.orig_video_cols = width; screen_info.orig_video_lines = height; - vga_default_font_height = c->vc_font.height; + vga_default_font_height = c->vc_cell_height; return 0; } if (width % 2 || width > screen_info.orig_video_cols || height > (screen_info.orig_video_lines * vga_default_font_height)/ - c->vc_font.height) + c->vc_cell_height) return -EINVAL;
if (con_is_visible(c) && !vga_is_gfx) /* who knows */ diff --git a/include/linux/console_struct.h b/include/linux/console_struct.h index fea64f2692a05..8b5bc3a47bf5f 100644 --- a/include/linux/console_struct.h +++ b/include/linux/console_struct.h @@ -62,6 +62,7 @@ struct vc_data { unsigned int vc_rows; unsigned int vc_size_row; /* Bytes per row */ unsigned int vc_scan_lines; /* # of scan lines */ + unsigned int vc_cell_height; /* CRTC character cell height */ unsigned long vc_origin; /* [!] Start of real screen */ unsigned long vc_scr_end; /* [!] End of real screen */ unsigned long vc_visible_origin; /* [!] Top of visible window */
From: Tetsuo Handa penguin-kernel@I-love.SAKURA.ne.jp
stable inclusion from linux-4.19.192 commit 17d6c58c5fc522561daa4d3fb270edba933ac0a6
--------------------------------
commit ffb324e6f874121f7dce5bdae5e05d02baae7269 upstream.
syzbot is reporting OOB write at vga16fb_imageblit() [1], for resize_screen() from ioctl(VT_RESIZE) returns 0 without checking whether requested rows/columns fit the amount of memory reserved for the graphical screen if current mode is KD_GRAPHICS.
---------- #include <sys/types.h> #include <sys/stat.h> #include <fcntl.h> #include <sys/ioctl.h> #include <linux/kd.h> #include <linux/vt.h>
int main(int argc, char *argv[]) { const int fd = open("/dev/char/4:1", O_RDWR); struct vt_sizes vt = { 0x4100, 2 };
ioctl(fd, KDSETMODE, KD_GRAPHICS); ioctl(fd, VT_RESIZE, &vt); ioctl(fd, KDSETMODE, KD_TEXT); return 0; } ----------
Allow framebuffer drivers to return -EINVAL, by moving vc->vc_mode != KD_GRAPHICS check from resize_screen() to fbcon_resize().
Link: https://syzkaller.appspot.com/bug?extid=1f29e126cf461c4de3b3 [1] Reported-by: syzbot syzbot+1f29e126cf461c4de3b3@syzkaller.appspotmail.com Suggested-by: Linus Torvalds torvalds@linux-foundation.org Signed-off-by: Tetsuo Handa penguin-kernel@I-love.SAKURA.ne.jp Tested-by: syzbot syzbot+1f29e126cf461c4de3b3@syzkaller.appspotmail.com Signed-off-by: Linus Torvalds torvalds@linux-foundation.org Signed-off-by: Greg Kroah-Hartman gregkh@linuxfoundation.org Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- drivers/tty/vt/vt.c | 2 +- drivers/video/fbdev/core/fbcon.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-)
diff --git a/drivers/tty/vt/vt.c b/drivers/tty/vt/vt.c index 1fad4978a3b41..b2b5f19fb2fb9 100644 --- a/drivers/tty/vt/vt.c +++ b/drivers/tty/vt/vt.c @@ -1169,7 +1169,7 @@ static inline int resize_screen(struct vc_data *vc, int width, int height, /* Resizes the resolution of the display adapater */ int err = 0;
- if (vc->vc_mode != KD_GRAPHICS && vc->vc_sw->con_resize) + if (vc->vc_sw->con_resize) err = vc->vc_sw->con_resize(vc, width, height, user);
return err; diff --git a/drivers/video/fbdev/core/fbcon.c b/drivers/video/fbdev/core/fbcon.c index 3a75d2d3d16ac..44bbad2e945aa 100644 --- a/drivers/video/fbdev/core/fbcon.c +++ b/drivers/video/fbdev/core/fbcon.c @@ -1998,7 +1998,7 @@ static int fbcon_resize(struct vc_data *vc, unsigned int width, return -EINVAL;
DPRINTK("resize now %ix%i\n", var.xres, var.yres); - if (con_is_visible(vc)) { + if (con_is_visible(vc) && vc->vc_mode == KD_TEXT) { var.activate = FB_ACTIVATE_NOW | FB_ACTIVATE_FORCE; fb_set_var(info, &var);
From: Aurelien Aptel aaptel@suse.com
stable inclusion from linux-4.19.193 commit d094067852cd1eefbcdc3c110c69265b6ce8c981
--------------------------------
commit 6d2fcfe6b517fe7cbf2687adfb0a16cdcd5d9243 upstream.
SMB3.0 doesn't have encryption negotiate context but simply uses the SMB2_GLOBAL_CAP_ENCRYPTION flag.
When that flag is present in the neg response cifs.ko uses AES-128-CCM which is the only cipher available in this context.
cipher_type was set to the server cipher only when parsing encryption negotiate context (SMB3.1.1).
For SMB3.0 it was set to 0. This means cipher_type value can be 0 or 1 for AES-128-CCM.
Fix this by checking for SMB3.0 and encryption capability and setting cipher_type appropriately.
Signed-off-by: Aurelien Aptel aaptel@suse.com Cc: stable@vger.kernel.org Signed-off-by: Steve French stfrench@microsoft.com Signed-off-by: Greg Kroah-Hartman gregkh@linuxfoundation.org Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- fs/cifs/smb2pdu.c | 7 +++++++ 1 file changed, 7 insertions(+)
diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c index 97995b2d63ea0..7fa8699e0ad94 100644 --- a/fs/cifs/smb2pdu.c +++ b/fs/cifs/smb2pdu.c @@ -782,6 +782,13 @@ SMB2_negotiate(const unsigned int xid, struct cifs_ses *ses) /* Internal types */ server->capabilities |= SMB2_NT_FIND | SMB2_LARGE_FILES;
+ /* + * SMB3.0 supports only 1 cipher and doesn't have a encryption neg context + * Set the cipher type manually. + */ + if (server->dialect == SMB30_PROT_ID && (server->capabilities & SMB2_GLOBAL_CAP_ENCRYPTION)) + server->cipher_type = SMB2_ENCRYPTION_AES128_CCM; + security_blob = smb2_get_data_area_len(&blob_offset, &blob_length, (struct smb2_sync_hdr *)rsp); /*
From: Anna Schumaker Anna.Schumaker@Netapp.com
stable inclusion from linux-4.19.193 commit 39785761feadf261bc5101372b0b0bbaf6a94494
--------------------------------
commit a421d218603ffa822a0b8045055c03eae394a7eb upstream.
Commit de144ff4234f changes _pnfs_return_layout() to call pnfs_mark_matching_lsegs_return() passing NULL as the struct pnfs_layout_range argument. Unfortunately, pnfs_mark_matching_lsegs_return() doesn't check if we have a value here before dereferencing it, causing an oops.
I'm able to hit this crash consistently when running connectathon basic tests on NFS v4.1/v4.2 against Ontap.
Fixes: de144ff4234f ("NFSv4: Don't discard segments marked for return in _pnfs_return_layout()") Cc: stable@vger.kernel.org Signed-off-by: Anna Schumaker Anna.Schumaker@Netapp.com Signed-off-by: Trond Myklebust trond.myklebust@hammerspace.com Signed-off-by: Greg Kroah-Hartman gregkh@linuxfoundation.org Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- fs/nfs/pnfs.c | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-)
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index af255109c5bf9..c900cb2119bae 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -1268,6 +1268,11 @@ _pnfs_return_layout(struct inode *ino) { struct pnfs_layout_hdr *lo = NULL; struct nfs_inode *nfsi = NFS_I(ino); + struct pnfs_layout_range range = { + .iomode = IOMODE_ANY, + .offset = 0, + .length = NFS4_MAX_UINT64, + }; LIST_HEAD(tmp_list); nfs4_stateid stateid; int status = 0; @@ -1294,16 +1299,10 @@ _pnfs_return_layout(struct inode *ino) } valid_layout = pnfs_layout_is_valid(lo); pnfs_clear_layoutcommit(ino, &tmp_list); - pnfs_mark_matching_lsegs_return(lo, &tmp_list, NULL, 0); + pnfs_mark_matching_lsegs_return(lo, &tmp_list, &range, 0);
- if (NFS_SERVER(ino)->pnfs_curr_ld->return_range) { - struct pnfs_layout_range range = { - .iomode = IOMODE_ANY, - .offset = 0, - .length = NFS4_MAX_UINT64, - }; + if (NFS_SERVER(ino)->pnfs_curr_ld->return_range) NFS_SERVER(ino)->pnfs_curr_ld->return_range(lo, &range); - }
/* Don't send a LAYOUTRETURN if list was initially empty */ if (!test_bit(NFS_LAYOUT_RETURN_REQUESTED, &lo->plh_flags) ||
From: Rolf Eike Beer eb@emlix.com
stable inclusion from linux-4.19.193 commit 2ec5e9bb6b0560c90d315559c28a99723c80b996
--------------------------------
commit 0ee74d5a48635c848c20f152d0d488bf84641304 upstream.
iommu_device_sysfs_add() is called before, so is has to be cleaned on subsequent errors.
Fixes: 39ab9555c2411 ("iommu: Add sysfs bindings for struct iommu_device") Cc: stable@vger.kernel.org # 4.11.x Signed-off-by: Rolf Eike Beer eb@emlix.com Acked-by: Lu Baolu baolu.lu@linux.intel.com Link: https://lore.kernel.org/r/17411490.HIIP88n32C@mobilepool36.emlix.com Link: https://lore.kernel.org/r/20210525070802.361755-2-baolu.lu@linux.intel.com Signed-off-by: Joerg Roedel jroedel@suse.de Signed-off-by: Greg Kroah-Hartman gregkh@linuxfoundation.org Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- drivers/iommu/dmar.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-)
diff --git a/drivers/iommu/dmar.c b/drivers/iommu/dmar.c index da4da2b4baa98..187812e35c99b 100644 --- a/drivers/iommu/dmar.c +++ b/drivers/iommu/dmar.c @@ -1105,13 +1105,15 @@ static int alloc_iommu(struct dmar_drhd_unit *drhd)
err = iommu_device_register(&iommu->iommu); if (err) - goto err_unmap; + goto err_sysfs; }
drhd->iommu = iommu;
return 0;
+err_sysfs: + iommu_device_sysfs_remove(&iommu->iommu); err_unmap: unmap_iommu(iommu); error_free_seq_id:
From: Kees Cook keescook@chromium.org
stable inclusion from linux-4.19.193 commit 582a9b9813ecc89a3b5944ea412f383d02904c50
--------------------------------
commit bfb819ea20ce8bbeeba17e1a6418bf8bda91fc28 upstream.
Fix another "confused deputy" weakness[1]. Writes to /proc/$pid/attr/ files need to check the opener credentials, since these fds do not transition state across execve(). Without this, it is possible to trick another process (which may have different credentials) to write to its own /proc/$pid/attr/ files, leading to unexpected and possibly exploitable behaviors.
[1] https://www.kernel.org/doc/html/latest/security/credentials.html?highlight=c...
Fixes: 1da177e4c3f41 ("Linux-2.6.12-rc2") Cc: stable@vger.kernel.org Signed-off-by: Kees Cook keescook@chromium.org Signed-off-by: Linus Torvalds torvalds@linux-foundation.org Signed-off-by: Greg Kroah-Hartman gregkh@linuxfoundation.org Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- fs/proc/base.c | 4 ++++ 1 file changed, 4 insertions(+)
diff --git a/fs/proc/base.c b/fs/proc/base.c index 32c7f7d692678..e43c1c9480cba 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -2564,6 +2564,10 @@ static ssize_t proc_pid_attr_write(struct file * file, const char __user * buf, void *page; int rv;
+ /* A task may only write when it was the opener. */ + if (file->f_cred != current_real_cred()) + return -EPERM; + rcu_read_lock(); task = pid_task(proc_pid(inode), PIDTYPE_PID); if (!task) {
From: Mikulas Patocka mpatocka@redhat.com
stable inclusion from linux-4.19.193 commit 3fe7be3c1d77af7038ebb3d4972b00ebea5f8183
--------------------------------
commit 7e768532b2396bcb7fbf6f82384b85c0f1d2f197 upstream.
If an origin target has no snapshots, o->split_boundary is set to 0. This causes BUG_ON(sectors <= 0) in block/bio.c:bio_split().
Fix this by initializing chunk_size, and in turn split_boundary, to rounddown_pow_of_two(UINT_MAX) -- the largest power of two that fits into "unsigned" type.
Signed-off-by: Mikulas Patocka mpatocka@redhat.com Cc: stable@vger.kernel.org Signed-off-by: Mike Snitzer snitzer@redhat.com Signed-off-by: Greg Kroah-Hartman gregkh@linuxfoundation.org Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- drivers/md/dm-snap.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/md/dm-snap.c b/drivers/md/dm-snap.c index 7d96836294713..52101e5c72588 100644 --- a/drivers/md/dm-snap.c +++ b/drivers/md/dm-snap.c @@ -794,7 +794,7 @@ static int dm_add_exception(void *context, chunk_t old, chunk_t new) static uint32_t __minimum_chunk_size(struct origin *o) { struct dm_snapshot *snap; - unsigned chunk_size = 0; + unsigned chunk_size = rounddown_pow_of_two(UINT_MAX);
if (o) list_for_each_entry(snap, &o->snapshots, list)
From: Dan Carpenter dan.carpenter@oracle.com
stable inclusion from linux-4.19.193 commit 945ebef997227ca8c20bad7f8a8358c8ee57a84a
--------------------------------
commit 769b01ea68b6c49dc3cde6adf7e53927dacbd3a8 upstream.
The "sizeof(struct nfs_fh)" is two bytes too large and could lead to memory corruption. It should be NFS_MAXFHSIZE because that's the size of the ->data[] buffer.
I reversed the size of the arguments to put the variable on the left.
Fixes: 16b374ca439f ("NFSv4.1: pnfs: filelayout: add driver's LAYOUTGET and GETDEVICEINFO infrastructure") Signed-off-by: Dan Carpenter dan.carpenter@oracle.com Signed-off-by: Trond Myklebust trond.myklebust@hammerspace.com Signed-off-by: Greg Kroah-Hartman gregkh@linuxfoundation.org Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- fs/nfs/filelayout/filelayout.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/fs/nfs/filelayout/filelayout.c b/fs/nfs/filelayout/filelayout.c index 2478a69da0f0c..e8e825497cbdc 100644 --- a/fs/nfs/filelayout/filelayout.c +++ b/fs/nfs/filelayout/filelayout.c @@ -717,7 +717,7 @@ filelayout_decode_layout(struct pnfs_layout_hdr *flo, if (unlikely(!p)) goto out_err; fl->fh_array[i]->size = be32_to_cpup(p++); - if (sizeof(struct nfs_fh) < fl->fh_array[i]->size) { + if (fl->fh_array[i]->size > NFS_MAXFHSIZE) { printk(KERN_ERR "NFS: Too big fh %d received %d\n", i, fl->fh_array[i]->size); goto out_err;
From: Trond Myklebust trond.myklebust@hammerspace.com
stable inclusion from linux-4.19.193 commit 40f139a6d50c232c0d1fd1c5e65a845c62db0ede
--------------------------------
commit 0d0ea309357dea0d85a82815f02157eb7fcda39f upstream.
The value of mirror->pg_bytes_written should only be updated after a successful attempt to flush out the requests on the list.
Fixes: a7d42ddb3099 ("nfs: add mirroring support to pgio layer") Signed-off-by: Trond Myklebust trond.myklebust@hammerspace.com Signed-off-by: Greg Kroah-Hartman gregkh@linuxfoundation.org Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- fs/nfs/pagelist.c | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-)
diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c index 2c7d76b4c5e18..a9e1bcdd9394d 100644 --- a/fs/nfs/pagelist.c +++ b/fs/nfs/pagelist.c @@ -987,17 +987,16 @@ static void nfs_pageio_doio(struct nfs_pageio_descriptor *desc) { struct nfs_pgio_mirror *mirror = nfs_pgio_current_mirror(desc);
- if (!list_empty(&mirror->pg_list)) { int error = desc->pg_ops->pg_doio(desc); if (error < 0) desc->pg_error = error; - else + if (list_empty(&mirror->pg_list)) { mirror->pg_bytes_written += mirror->pg_count; - } - if (list_empty(&mirror->pg_list)) { - mirror->pg_count = 0; - mirror->pg_base = 0; + mirror->pg_count = 0; + mirror->pg_base = 0; + mirror->pg_recoalesce = 0; + } } }
@@ -1095,7 +1094,6 @@ static int nfs_do_recoalesce(struct nfs_pageio_descriptor *desc)
do { list_splice_init(&mirror->pg_list, &head); - mirror->pg_bytes_written -= mirror->pg_count; mirror->pg_count = 0; mirror->pg_base = 0; mirror->pg_recoalesce = 0;
From: Zhang Xiaoxu zhangxiaoxu5@huawei.com
stable inclusion from linux-4.19.193 commit 1cfca6c32c2a6c41370010fcc4067e2d8dcfa02b
--------------------------------
commit e67afa7ee4a59584d7253e45d7f63b9528819a13 upstream.
Since commit bdcc2cd14e4e ("NFSv4.2: handle NFS-specific llseek errors"), nfs42_proc_llseek would return -EOPNOTSUPP rather than -ENOTSUPP when SEEK_DATA on NFSv4.0/v4.1.
This will lead xfstests generic/285 not run on NFSv4.0/v4.1 when set the CONFIG_NFS_V4_2, rather than run failed.
Fixes: bdcc2cd14e4e ("NFSv4.2: handle NFS-specific llseek errors") Cc: <stable.vger.kernel.org> # 4.2 Signed-off-by: Zhang Xiaoxu zhangxiaoxu5@huawei.com Signed-off-by: Trond Myklebust trond.myklebust@hammerspace.com Signed-off-by: Greg Kroah-Hartman gregkh@linuxfoundation.org Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- fs/nfs/nfs4file.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/fs/nfs/nfs4file.c b/fs/nfs/nfs4file.c index 75d3cf86f1723..e053a883d08d4 100644 --- a/fs/nfs/nfs4file.c +++ b/fs/nfs/nfs4file.c @@ -148,7 +148,7 @@ static loff_t nfs4_file_llseek(struct file *filep, loff_t offset, int whence) case SEEK_HOLE: case SEEK_DATA: ret = nfs42_proc_llseek(filep, offset, whence); - if (ret != -ENOTSUPP) + if (ret != -EOPNOTSUPP) return ret; /* Fall through */ default:
From: Steve French stfrench@microsoft.com
stable inclusion from linux-4.19.193 commit 66988dc4dc62adf9d86e8f1e844fc747bbee28b7
--------------------------------
[ Upstream commit c0d46717b95735b0eacfddbcca9df37a49de9c7a ]
See MS-SMB2 3.2.4.1.4, file ids in compounded requests should be set to 0xFFFFFFFFFFFFFFFF (we were treating it as u32 not u64 and setting it incorrectly).
Signed-off-by: Steve French stfrench@microsoft.com Reported-by: Stefan Metzmacher metze@samba.org Reviewed-by: Shyam Prasad N sprasad@microsoft.com Signed-off-by: Sasha Levin sashal@kernel.org Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- fs/cifs/smb2pdu.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-)
diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c index 7fa8699e0ad94..544e31149f078 100644 --- a/fs/cifs/smb2pdu.c +++ b/fs/cifs/smb2pdu.c @@ -3129,10 +3129,10 @@ smb2_new_read_req(void **buf, unsigned int *total_len, * Related requests use info from previous read request * in chain. */ - shdr->SessionId = 0xFFFFFFFF; + shdr->SessionId = 0xFFFFFFFFFFFFFFFF; shdr->TreeId = 0xFFFFFFFF; - req->PersistentFileId = 0xFFFFFFFF; - req->VolatileFileId = 0xFFFFFFFF; + req->PersistentFileId = 0xFFFFFFFFFFFFFFFF; + req->VolatileFileId = 0xFFFFFFFFFFFFFFFF; } } if (remaining_bytes > io_parms->length)
From: Dan Carpenter dan.carpenter@oracle.com
stable inclusion from linux-4.19.193 commit 94e2701600ecc5505d4727d580c83b66ecc80ec7
--------------------------------
[ Upstream commit 8c7e7b8486cda21269d393245883c5e4737d5ee7 ]
If sas_notify_lldd_dev_found() fails then this code calls:
sas_unregister_dev(port, dev);
which removes "dev", our list iterator, from the list. This could lead to an endless loop. We need to use list_for_each_entry_safe().
Link: https://lore.kernel.org/r/YKUeq6gwfGcvvhty@mwanda Fixes: 303694eeee5e ("[SCSI] libsas: suspend / resume support") Reviewed-by: John Garry john.garry@huawei.com Signed-off-by: Dan Carpenter dan.carpenter@oracle.com Signed-off-by: Martin K. Petersen martin.petersen@oracle.com Signed-off-by: Sasha Levin sashal@kernel.org Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- drivers/scsi/libsas/sas_port.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/drivers/scsi/libsas/sas_port.c b/drivers/scsi/libsas/sas_port.c index ddf004bf667e6..32991f7ec2a10 100644 --- a/drivers/scsi/libsas/sas_port.c +++ b/drivers/scsi/libsas/sas_port.c @@ -41,7 +41,7 @@ static bool phy_is_wideport_member(struct asd_sas_port *port, struct asd_sas_phy
static void sas_resume_port(struct asd_sas_phy *phy) { - struct domain_device *dev; + struct domain_device *dev, *n; struct asd_sas_port *port = phy->port; struct sas_ha_struct *sas_ha = phy->ha; struct sas_internal *si = to_sas_internal(sas_ha->core.shost->transportt); @@ -60,7 +60,7 @@ static void sas_resume_port(struct asd_sas_phy *phy) * 1/ presume every device came back * 2/ force the next revalidation to check all expander phys */ - list_for_each_entry(dev, &port->dev_list, dev_list_node) { + list_for_each_entry_safe(dev, n, &port->dev_list, dev_list_node) { int i, rc;
rc = sas_notify_lldd_dev_found(dev);
From: Francesco Ruggeri fruggeri@arista.com
stable inclusion from linux-4.19.193 commit ea9ef822d541859b65696b62647ccc4ab43c1129
--------------------------------
[ Upstream commit e29f011e8fc04b2cdc742a2b9bbfa1b62518381a ]
Commit dbd1759e6a9c ("ipv6: on reassembly, record frag_max_size") filled the frag_max_size field in IP6CB in the input path. The field should also be filled in case of atomic fragments.
Fixes: dbd1759e6a9c ('ipv6: on reassembly, record frag_max_size') Signed-off-by: Francesco Ruggeri fruggeri@arista.com Signed-off-by: David S. Miller davem@davemloft.net Signed-off-by: Sasha Levin sashal@kernel.org Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- net/ipv6/reassembly.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-)
diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c index c6132e39ab16b..60dfd0d118512 100644 --- a/net/ipv6/reassembly.c +++ b/net/ipv6/reassembly.c @@ -347,7 +347,7 @@ static int ipv6_frag_rcv(struct sk_buff *skb) hdr = ipv6_hdr(skb); fhdr = (struct frag_hdr *)skb_transport_header(skb);
- if (!(fhdr->frag_off & htons(0xFFF9))) { + if (!(fhdr->frag_off & htons(IP6_OFFSET | IP6_MF))) { /* It is not a fragmented frame */ skb->transport_header += sizeof(struct frag_hdr); __IP6_INC_STATS(net, @@ -355,6 +355,8 @@ static int ipv6_frag_rcv(struct sk_buff *skb)
IP6CB(skb)->nhoff = (u8 *)fhdr - skb_network_header(skb); IP6CB(skb)->flags |= IP6SKB_FRAGMENTED; + IP6CB(skb)->frag_max_size = ntohs(hdr->payload_len) + + sizeof(struct ipv6hdr); return 1; }
From: Mike Kravetz mike.kravetz@oracle.com
stable inclusion from linux-4.19.193 commit a92212ef6326c8dc09003c7af4e1ba7da0b77e44
--------------------------------
commit 552546366a30d88bd1d6f5efe848b2ab50fd57e5 upstream.
A new clang diagnostic (-Wsizeof-array-div) warns about the calculation to determine the number of u32's in an array of unsigned longs. Suppress warning by adding parentheses.
While looking at the above issue, noticed that the 'address' parameter to hugetlb_fault_mutex_hash is no longer used. So, remove it from the definition and all callers.
No functional change.
Link: http://lkml.kernel.org/r/20190919011847.18400-1-mike.kravetz@oracle.com Signed-off-by: Mike Kravetz mike.kravetz@oracle.com Reported-by: Nathan Chancellor natechancellor@gmail.com Reviewed-by: Nathan Chancellor natechancellor@gmail.com Reviewed-by: Davidlohr Bueso dbueso@suse.de Reviewed-by: Andrew Morton akpm@linux-foundation.org Cc: Nick Desaulniers ndesaulniers@google.com Cc: Ilie Halip ilie.halip@gmail.com Cc: David Bolvansky david.bolvansky@gmail.com Signed-off-by: Andrew Morton akpm@linux-foundation.org Signed-off-by: Linus Torvalds torvalds@linux-foundation.org Signed-off-by: Greg Kroah-Hartman gregkh@linuxfoundation.org Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- fs/hugetlbfs/inode.c | 4 ++-- include/linux/hugetlb.h | 2 +- mm/hugetlb.c | 10 +++++----- mm/userfaultfd.c | 2 +- 4 files changed, 9 insertions(+), 9 deletions(-)
diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c index 47a7b96cfe10f..005e05c442c58 100644 --- a/fs/hugetlbfs/inode.c +++ b/fs/hugetlbfs/inode.c @@ -491,7 +491,7 @@ static void remove_inode_hugepages(struct inode *inode, loff_t lstart, u32 hash;
index = page->index; - hash = hugetlb_fault_mutex_hash(h, mapping, index, 0); + hash = hugetlb_fault_mutex_hash(h, mapping, index); mutex_lock(&hugetlb_fault_mutex_table[hash]);
/* @@ -756,7 +756,7 @@ static int hugetlbfs_fallocate_chunk(pgoff_t start, pgoff_t end, addr = index * hpage_size;
/* mutex taken here, fault path and hole punch */ - hash = hugetlb_fault_mutex_hash(h, mapping, index, addr); + hash = hugetlb_fault_mutex_hash(h, mapping, index); mutex_lock(&hugetlb_fault_mutex_table[hash]);
/* See if already present in mapping to avoid alloc/free */ diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h index 8aec9d4220bda..831e8b654ab81 100644 --- a/include/linux/hugetlb.h +++ b/include/linux/hugetlb.h @@ -124,7 +124,7 @@ void free_huge_page(struct page *page); void hugetlb_fix_reserve_counts(struct inode *inode); extern struct mutex *hugetlb_fault_mutex_table; u32 hugetlb_fault_mutex_hash(struct hstate *h, struct address_space *mapping, - pgoff_t idx, unsigned long address); + pgoff_t idx);
pte_t *huge_pmd_share(struct mm_struct *mm, unsigned long addr, pud_t *pud);
diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 726a3aa9d19d1..f542a32e82f55 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -4003,7 +4003,7 @@ static vm_fault_t hugetlb_no_page(struct mm_struct *mm, * handling userfault. Reacquire after handling * fault to make calling code simpler. */ - hash = hugetlb_fault_mutex_hash(h, mapping, idx, haddr); + hash = hugetlb_fault_mutex_hash(h, mapping, idx); mutex_unlock(&hugetlb_fault_mutex_table[hash]); ret = handle_userfault(&vmf, VM_UFFD_MISSING); mutex_lock(&hugetlb_fault_mutex_table[hash]); @@ -4131,7 +4131,7 @@ static vm_fault_t hugetlb_no_page(struct mm_struct *mm,
#ifdef CONFIG_SMP u32 hugetlb_fault_mutex_hash(struct hstate *h, struct address_space *mapping, - pgoff_t idx, unsigned long address) + pgoff_t idx) { unsigned long key[2]; u32 hash; @@ -4139,7 +4139,7 @@ u32 hugetlb_fault_mutex_hash(struct hstate *h, struct address_space *mapping, key[0] = (unsigned long) mapping; key[1] = idx;
- hash = jhash2((u32 *)&key, sizeof(key)/sizeof(u32), 0); + hash = jhash2((u32 *)&key, sizeof(key)/(sizeof(u32)), 0);
return hash & (num_fault_mutexes - 1); } @@ -4149,7 +4149,7 @@ u32 hugetlb_fault_mutex_hash(struct hstate *h, struct address_space *mapping, * return 0 and avoid the hashing overhead. */ u32 hugetlb_fault_mutex_hash(struct hstate *h, struct address_space *mapping, - pgoff_t idx, unsigned long address) + pgoff_t idx) { return 0; } @@ -4193,7 +4193,7 @@ vm_fault_t hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma, * get spurious allocation failures if two CPUs race to instantiate * the same page in the page cache. */ - hash = hugetlb_fault_mutex_hash(h, mapping, idx, haddr); + hash = hugetlb_fault_mutex_hash(h, mapping, idx); mutex_lock(&hugetlb_fault_mutex_table[hash]);
entry = huge_ptep_get(ptep); diff --git a/mm/userfaultfd.c b/mm/userfaultfd.c index cc6ea42d1ea81..1c86abd41c6d7 100644 --- a/mm/userfaultfd.c +++ b/mm/userfaultfd.c @@ -297,7 +297,7 @@ static __always_inline ssize_t __mcopy_atomic_hugetlb(struct mm_struct *dst_mm, */ idx = linear_page_index(dst_vma, dst_addr); mapping = dst_vma->vm_file->f_mapping; - hash = hugetlb_fault_mutex_hash(h, mapping, idx, dst_addr); + hash = hugetlb_fault_mutex_hash(h, mapping, idx); mutex_lock(&hugetlb_fault_mutex_table[hash]);
err = -ENOMEM;