From: Pavel Begunkov asml.silence@gmail.com
mainline inclusion from mainline-v5.6-rc1 commit 28ca0d6d39ab1d01c86762c82a585b7cedd2920c category: bugfix bugzilla: 35619 CVE: NA
--------------------------------
As other *continue() helpers, this continues iteration from a given position.
Signed-off-by: Pavel Begunkov asml.silence@gmail.com Signed-off-by: Jens Axboe axboe@kernel.dk Signed-off-by: Zhang Xiaoxu zhangxiaoxu5@huawei.com Reviewed-by: zhangyi (F) yi.zhang@huawei.com Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- include/linux/list.h | 10 ++++++++++ 1 file changed, 10 insertions(+)
diff --git a/include/linux/list.h b/include/linux/list.h index de04cc5ed536..0e540581d52c 100644 --- a/include/linux/list.h +++ b/include/linux/list.h @@ -455,6 +455,16 @@ static inline void list_splice_tail_init(struct list_head *list, #define list_for_each(pos, head) \ for (pos = (head)->next; pos != (head); pos = pos->next)
+/** + * list_for_each_continue - continue iteration over a list + * @pos: the &struct list_head to use as a loop cursor. + * @head: the head for your list. + * + * Continue to iterate over a list, continuing after the current position. + */ +#define list_for_each_continue(pos, head) \ + for (pos = pos->next; pos != (head); pos = pos->next) + /** * list_for_each_prev - iterate over a list backwards * @pos: the &struct list_head to use as a loop cursor.
From: Miklos Szeredi mszeredi@redhat.com
mainline inclusion from mainline-v5.8-rc1 commit 9f6c61f96f2d97cbb5f7fa85607bc398f843ff0f category: bugfix bugzilla: 35619 CVE: NA
--------------------------------
If mounts are deleted after a read(2) call on /proc/self/mounts (or its kin), the subsequent read(2) could miss a mount that comes after the deleted one in the list. This is because the file position is interpreted as the number mount entries from the start of the list.
E.g. first read gets entries #0 to #9; the seq file index will be 10. Then entry #5 is deleted, resulting in #10 becoming #9 and #11 becoming #10, etc... The next read will continue from entry #10, and #9 is missed.
Solve this by adding a cursor entry for each open instance. Taking the global namespace_sem for write seems excessive, since we are only dealing with a per-namespace list. Instead add a per-namespace spinlock and use that together with namespace_sem taken for read to protect against concurrent modification of the mount list. This may reduce parallelism of is_local_mountpoint(), but it's hardly a big contention point. We could also use RCU freeing of cursors to make traversal not need additional locks, if that turns out to be neceesary.
Only move the cursor once for each read (cursor is not added on open) to minimize cacheline invalidation. When EOF is reached, the cursor is taken off the list, in order to prevent an excessive number of cursors due to inactive open file descriptors.
Reported-by: Karel Zak kzak@redhat.com Signed-off-by: Miklos Szeredi mszeredi@redhat.com
Conflicts: fs/mount.h fs/namespace.c
Signed-off-by: Zhang Xiaoxu zhangxiaoxu5@huawei.com Reviewed-by: zhangyi (F) yi.zhang@huawei.com Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- fs/mount.h | 12 ++++-- fs/namespace.c | 91 +++++++++++++++++++++++++++++++++++-------- fs/proc_namespace.c | 4 +- include/linux/mount.h | 4 +- 4 files changed, 90 insertions(+), 21 deletions(-)
diff --git a/fs/mount.h b/fs/mount.h index f39bc9da4d73..b8318db51ea1 100644 --- a/fs/mount.h +++ b/fs/mount.h @@ -9,7 +9,13 @@ struct mnt_namespace { atomic_t count; struct ns_common ns; struct mount * root; + /* + * Traversal and modification of .list is protected by either + * - taking namespace_sem for write, OR + * - taking namespace_sem for read AND taking .ns_lock. + */ struct list_head list; + spinlock_t ns_lock; struct user_namespace *user_ns; struct ucounts *ucounts; u64 seq; /* Sequence number to prevent loops */ @@ -131,9 +137,7 @@ struct proc_mounts { struct mnt_namespace *ns; struct path root; int (*show)(struct seq_file *, struct vfsmount *); - void *cached_mount; - u64 cached_event; - loff_t cached_index; + struct mount cursor; };
extern const struct seq_operations mounts_op; @@ -146,3 +150,5 @@ static inline bool is_local_mountpoint(struct dentry *dentry)
return __is_local_mountpoint(dentry); } + +extern void mnt_cursor_del(struct mnt_namespace *ns, struct mount *cursor); diff --git a/fs/namespace.c b/fs/namespace.c index da90b9c878c1..c582abcdab59 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -653,6 +653,21 @@ struct vfsmount *lookup_mnt(const struct path *path) return m; }
+static inline void lock_ns_list(struct mnt_namespace *ns) +{ + spin_lock(&ns->ns_lock); +} + +static inline void unlock_ns_list(struct mnt_namespace *ns) +{ + spin_unlock(&ns->ns_lock); +} + +static inline bool mnt_is_cursor(struct mount *mnt) +{ + return mnt->mnt.mnt_flags & MNT_CURSOR; +} + /* * __is_local_mountpoint - Test to see if dentry is a mountpoint in the * current mount namespace. @@ -678,11 +693,15 @@ bool __is_local_mountpoint(struct dentry *dentry) goto out;
down_read(&namespace_sem); + lock_ns_list(ns); list_for_each_entry(mnt, &ns->list, mnt_list) { + if (mnt_is_cursor(mnt)) + continue; is_covered = (mnt->mnt_mountpoint == dentry); if (is_covered) break; } + unlock_ns_list(ns); up_read(&namespace_sem); out: return is_covered; @@ -1237,46 +1256,71 @@ struct vfsmount *mnt_clone_internal(const struct path *path) }
#ifdef CONFIG_PROC_FS +static struct mount *mnt_list_next(struct mnt_namespace *ns, + struct list_head *p) +{ + struct mount *mnt, *ret = NULL; + + lock_ns_list(ns); + list_for_each_continue(p, &ns->list) { + mnt = list_entry(p, typeof(*mnt), mnt_list); + if (!mnt_is_cursor(mnt)) { + ret = mnt; + break; + } + } + unlock_ns_list(ns); + + return ret; +} + /* iterator; we want it to have access to namespace_sem, thus here... */ static void *m_start(struct seq_file *m, loff_t *pos) { struct proc_mounts *p = m->private; + struct list_head *prev;
down_read(&namespace_sem); - if (p->cached_event == p->ns->event) { - void *v = p->cached_mount; - if (*pos == p->cached_index) - return v; - if (*pos == p->cached_index + 1) { - v = seq_list_next(v, &p->ns->list, &p->cached_index); - return p->cached_mount = v; - } + if (!*pos) { + prev = &p->ns->list; + } else { + prev = &p->cursor.mnt_list; + + /* Read after we'd reached the end? */ + if (list_empty(prev)) + return NULL; }
- p->cached_event = p->ns->event; - p->cached_mount = seq_list_start(&p->ns->list, *pos); - p->cached_index = *pos; - return p->cached_mount; + return mnt_list_next(p->ns, prev); }
static void *m_next(struct seq_file *m, void *v, loff_t *pos) { struct proc_mounts *p = m->private; + struct mount *mnt = v;
- p->cached_mount = seq_list_next(v, &p->ns->list, pos); - p->cached_index = *pos; - return p->cached_mount; + ++*pos; + return mnt_list_next(p->ns, &mnt->mnt_list); }
static void m_stop(struct seq_file *m, void *v) { + struct proc_mounts *p = m->private; + struct mount *mnt = v; + + lock_ns_list(p->ns); + if (mnt) + list_move_tail(&p->cursor.mnt_list, &mnt->mnt_list); + else + list_del_init(&p->cursor.mnt_list); + unlock_ns_list(p->ns); up_read(&namespace_sem); }
static int m_show(struct seq_file *m, void *v) { struct proc_mounts *p = m->private; - struct mount *r = list_entry(v, struct mount, mnt_list); + struct mount *r = v; return p->show(m, &r->mnt); }
@@ -1286,6 +1330,15 @@ const struct seq_operations mounts_op = { .stop = m_stop, .show = m_show, }; + +void mnt_cursor_del(struct mnt_namespace *ns, struct mount *cursor) +{ + down_read(&namespace_sem); + lock_ns_list(ns); + list_del(&cursor->mnt_list); + unlock_ns_list(ns); + up_read(&namespace_sem); +} #endif /* CONFIG_PROC_FS */
/** @@ -2858,6 +2911,7 @@ static struct mnt_namespace *alloc_mnt_ns(struct user_namespace *user_ns) INIT_LIST_HEAD(&new_ns->list); init_waitqueue_head(&new_ns->poll); new_ns->event = 0; + spin_lock_init(&new_ns->ns_lock); new_ns->user_ns = get_user_ns(user_ns); new_ns->ucounts = ucounts; new_ns->mounts = 0; @@ -3312,10 +3366,14 @@ static bool mnt_already_visible(struct mnt_namespace *ns, struct vfsmount *new, bool visible = false;
down_read(&namespace_sem); + lock_ns_list(ns); list_for_each_entry(mnt, &ns->list, mnt_list) { struct mount *child; int mnt_flags;
+ if (mnt_is_cursor(mnt)) + continue; + if (mnt->mnt.mnt_sb->s_type != new->mnt_sb->s_type) continue;
@@ -3363,6 +3421,7 @@ static bool mnt_already_visible(struct mnt_namespace *ns, struct vfsmount *new, next: ; } found: + unlock_ns_list(ns); up_read(&namespace_sem); return visible; } diff --git a/fs/proc_namespace.c b/fs/proc_namespace.c index e16fb8f2049e..969f9c8fbdc0 100644 --- a/fs/proc_namespace.c +++ b/fs/proc_namespace.c @@ -279,7 +279,8 @@ static int mounts_open_common(struct inode *inode, struct file *file, p->ns = ns; p->root = root; p->show = show; - p->cached_event = ~0ULL; + INIT_LIST_HEAD(&p->cursor.mnt_list); + p->cursor.mnt.mnt_flags = MNT_CURSOR;
return 0;
@@ -296,6 +297,7 @@ static int mounts_release(struct inode *inode, struct file *file) struct seq_file *m = file->private_data; struct proc_mounts *p = m->private; path_put(&p->root); + mnt_cursor_del(p->ns, &p->cursor); put_mnt_ns(p->ns); return seq_release_private(inode, file); } diff --git a/include/linux/mount.h b/include/linux/mount.h index 4b0db4418954..46a77d791870 100644 --- a/include/linux/mount.h +++ b/include/linux/mount.h @@ -49,7 +49,8 @@ struct mnt_namespace; #define MNT_ATIME_MASK (MNT_NOATIME | MNT_NODIRATIME | MNT_RELATIME )
#define MNT_INTERNAL_FLAGS (MNT_SHARED | MNT_WRITE_HOLD | MNT_INTERNAL | \ - MNT_DOOMED | MNT_SYNC_UMOUNT | MNT_MARKED) + MNT_DOOMED | MNT_SYNC_UMOUNT | MNT_MARKED | \ + MNT_CURSOR)
#define MNT_INTERNAL 0x4000
@@ -63,6 +64,7 @@ struct mnt_namespace; #define MNT_SYNC_UMOUNT 0x2000000 #define MNT_MARKED 0x4000000 #define MNT_UMOUNT 0x8000000 +#define MNT_CURSOR 0x10000000
struct vfsmount { struct dentry *mnt_root; /* root of the mounted tree */
From: Zhang Xiaoxu zhangxiaoxu5@huawei.com
hulk inclusion category: bugfix bugzilla: 35619 CVE: NA
---------------------------
Since we add ns_lock in struct mnt_namespace, it broken the KABI, so use wrapper to fix it.
We assume all module just use the pointer of struct mnt_namespace, rather than use the implement of struct mnt_namespace.
Signed-off-by: Zhang Xiaoxu zhangxiaoxu5@huawei.com Reviewed-by: zhangyi (F) yi.zhang@huawei.com Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- fs/mount.h | 9 ++++++-- fs/namespace.c | 58 +++++++++++++++++++++++++++++++++----------------- 2 files changed, 46 insertions(+), 21 deletions(-)
diff --git a/fs/mount.h b/fs/mount.h index b8318db51ea1..ab0150e2f0ad 100644 --- a/fs/mount.h +++ b/fs/mount.h @@ -12,10 +12,10 @@ struct mnt_namespace { /* * Traversal and modification of .list is protected by either * - taking namespace_sem for write, OR - * - taking namespace_sem for read AND taking .ns_lock. + * - taking namespace_sem for read AND taking .ns_lock + * in mnt_namespace_wrapper */ struct list_head list; - spinlock_t ns_lock; struct user_namespace *user_ns; struct ucounts *ucounts; u64 seq; /* Sequence number to prevent loops */ @@ -25,6 +25,11 @@ struct mnt_namespace { unsigned int pending_mounts; } __randomize_layout;
+struct mnt_namespace_wrapper { + struct mnt_namespace ns; + spinlock_t ns_lock; +}; + struct mnt_pcp { int mnt_count; int mnt_writers; diff --git a/fs/namespace.c b/fs/namespace.c index c582abcdab59..2c84a110ce2d 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -653,14 +653,14 @@ struct vfsmount *lookup_mnt(const struct path *path) return m; }
-static inline void lock_ns_list(struct mnt_namespace *ns) +static inline void lock_ns_list(struct mnt_namespace_wrapper *nsw) { - spin_lock(&ns->ns_lock); + spin_lock(&nsw->ns_lock); }
-static inline void unlock_ns_list(struct mnt_namespace *ns) +static inline void unlock_ns_list(struct mnt_namespace_wrapper *nsw) { - spin_unlock(&ns->ns_lock); + spin_unlock(&nsw->ns_lock); }
static inline bool mnt_is_cursor(struct mount *mnt) @@ -686,14 +686,16 @@ static inline bool mnt_is_cursor(struct mount *mnt) bool __is_local_mountpoint(struct dentry *dentry) { struct mnt_namespace *ns = current->nsproxy->mnt_ns; + struct mnt_namespace_wrapper *nsw; struct mount *mnt; bool is_covered = false;
if (!d_mountpoint(dentry)) goto out;
+ nsw = container_of(ns, struct mnt_namespace_wrapper, ns); down_read(&namespace_sem); - lock_ns_list(ns); + lock_ns_list(nsw); list_for_each_entry(mnt, &ns->list, mnt_list) { if (mnt_is_cursor(mnt)) continue; @@ -701,7 +703,7 @@ bool __is_local_mountpoint(struct dentry *dentry) if (is_covered) break; } - unlock_ns_list(ns); + unlock_ns_list(nsw); up_read(&namespace_sem); out: return is_covered; @@ -1260,8 +1262,11 @@ static struct mount *mnt_list_next(struct mnt_namespace *ns, struct list_head *p) { struct mount *mnt, *ret = NULL; + struct mnt_namespace_wrapper *nsw;
- lock_ns_list(ns); + nsw = container_of(ns, struct mnt_namespace_wrapper, ns); + + lock_ns_list(nsw); list_for_each_continue(p, &ns->list) { mnt = list_entry(p, typeof(*mnt), mnt_list); if (!mnt_is_cursor(mnt)) { @@ -1269,7 +1274,7 @@ static struct mount *mnt_list_next(struct mnt_namespace *ns, break; } } - unlock_ns_list(ns); + unlock_ns_list(nsw);
return ret; } @@ -1307,13 +1312,16 @@ static void m_stop(struct seq_file *m, void *v) { struct proc_mounts *p = m->private; struct mount *mnt = v; + struct mnt_namespace_wrapper *nsw; + + nsw = container_of(p->ns, struct mnt_namespace_wrapper, ns);
- lock_ns_list(p->ns); + lock_ns_list(nsw); if (mnt) list_move_tail(&p->cursor.mnt_list, &mnt->mnt_list); else list_del_init(&p->cursor.mnt_list); - unlock_ns_list(p->ns); + unlock_ns_list(nsw); up_read(&namespace_sem); }
@@ -1333,10 +1341,14 @@ const struct seq_operations mounts_op = {
void mnt_cursor_del(struct mnt_namespace *ns, struct mount *cursor) { + struct mnt_namespace_wrapper *nsw; + + nsw = container_of(ns, struct mnt_namespace_wrapper, ns); + down_read(&namespace_sem); - lock_ns_list(ns); + lock_ns_list(nsw); list_del(&cursor->mnt_list); - unlock_ns_list(ns); + unlock_ns_list(nsw); up_read(&namespace_sem); } #endif /* CONFIG_PROC_FS */ @@ -2868,10 +2880,13 @@ static void dec_mnt_namespaces(struct ucounts *ucounts)
static void free_mnt_ns(struct mnt_namespace *ns) { + struct mnt_namespace_wrapper *nsw; + + nsw = container_of(ns, struct mnt_namespace_wrapper, ns); ns_free_inum(&ns->ns); dec_mnt_namespaces(ns->ucounts); put_user_ns(ns->user_ns); - kfree(ns); + kfree(nsw); }
/* @@ -2886,6 +2901,7 @@ static atomic64_t mnt_ns_seq = ATOMIC64_INIT(1); static struct mnt_namespace *alloc_mnt_ns(struct user_namespace *user_ns) { struct mnt_namespace *new_ns; + struct mnt_namespace_wrapper *new_nsw; struct ucounts *ucounts; int ret;
@@ -2893,14 +2909,15 @@ static struct mnt_namespace *alloc_mnt_ns(struct user_namespace *user_ns) if (!ucounts) return ERR_PTR(-ENOSPC);
- new_ns = kmalloc(sizeof(struct mnt_namespace), GFP_KERNEL); - if (!new_ns) { + new_nsw = kmalloc(sizeof(struct mnt_namespace_wrapper), GFP_KERNEL); + if (!new_nsw) { dec_mnt_namespaces(ucounts); return ERR_PTR(-ENOMEM); } + new_ns = &new_nsw->ns; ret = ns_alloc_inum(&new_ns->ns); if (ret) { - kfree(new_ns); + kfree(new_nsw); dec_mnt_namespaces(ucounts); return ERR_PTR(ret); } @@ -2911,7 +2928,7 @@ static struct mnt_namespace *alloc_mnt_ns(struct user_namespace *user_ns) INIT_LIST_HEAD(&new_ns->list); init_waitqueue_head(&new_ns->poll); new_ns->event = 0; - spin_lock_init(&new_ns->ns_lock); + spin_lock_init(&new_nsw->ns_lock); new_ns->user_ns = get_user_ns(user_ns); new_ns->ucounts = ucounts; new_ns->mounts = 0; @@ -3364,9 +3381,12 @@ static bool mnt_already_visible(struct mnt_namespace *ns, struct vfsmount *new, int new_flags = *new_mnt_flags; struct mount *mnt; bool visible = false; + struct mnt_namespace_wrapper *nsw; + + nsw = container_of(ns, struct mnt_namespace_wrapper, ns);
down_read(&namespace_sem); - lock_ns_list(ns); + lock_ns_list(nsw); list_for_each_entry(mnt, &ns->list, mnt_list) { struct mount *child; int mnt_flags; @@ -3421,7 +3441,7 @@ static bool mnt_already_visible(struct mnt_namespace *ns, struct vfsmount *new, next: ; } found: - unlock_ns_list(ns); + unlock_ns_list(nsw); up_read(&namespace_sem); return visible; }
From: Yang Yingliang yangyingliang@huawei.com
hulk inclusion category: bugfix bugzilla: 47452 CVE: NA
-------------------------------------------------
Fix the compile error: mm/vmstat.c:1739: undefined reference to `isolate_cnt' mm/vmstat.c:1740: undefined reference to `unexpect_free_cnt' kernel/sysctl.o:(.data+0x1550): undefined reference to `sysctl_isolate_corrupted_freelist'
Fixes: add85f87f2a8c ("sysctl: control if check validity of...") Fixes: cd12ff78aee9e ("connector: debug: try catch unexpect free skb->data") Fixes: d5d59f865c97a ("mm: slub: check freelist validity to avoid crash") Signed-off-by: Yang Yingliang yangyingliang@huawei.com Reviewed-by: Kefeng Wang wangkefeng.wang@huawei.com Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- kernel/sysctl.c | 4 ++++ mm/vmstat.c | 4 ++++ 2 files changed, 8 insertions(+)
diff --git a/kernel/sysctl.c b/kernel/sysctl.c index c921ee10615a..f8a376720e87 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -1245,7 +1245,9 @@ static struct ctl_table kern_table[] = { { } };
+#ifdef CONFIG_SLUB extern int sysctl_isolate_corrupted_freelist; +#endif static struct ctl_table vm_table[] = { { .procname = "overcommit_memory", @@ -1715,6 +1717,7 @@ static struct ctl_table vm_table[] = { .extra2 = (void *)&mmap_rnd_compat_bits_max, }, #endif +#ifdef CONFIG_SLUB { .procname = "isolate_corrupted_freelist", .data = &sysctl_isolate_corrupted_freelist, @@ -1724,6 +1727,7 @@ static struct ctl_table vm_table[] = { .extra1 = &zero, .extra2 = &one, }, +#endif { } };
diff --git a/mm/vmstat.c b/mm/vmstat.c index 222d6a7cbef9..4258c2b344d2 100644 --- a/mm/vmstat.c +++ b/mm/vmstat.c @@ -1732,12 +1732,16 @@ static int vmstat_show(struct seq_file *m, void *arg) return 0; }
+#ifdef CONFIG_SLUB extern int isolate_cnt; extern int unexpect_free_cnt; +#endif static void vmstat_stop(struct seq_file *m, void *arg) { +#ifdef CONFIG_SLUB seq_printf(m, "nr_freelist_isolated %d\n", isolate_cnt); seq_printf(m, "nr_unexpected_free %d\n", unexpect_free_cnt); +#endif kfree(m->private); m->private = NULL; }
From: "Martin K. Petersen" martin.petersen@oracle.com
hulk inclusion category: bugfix bugzilla: 46833 CVE: NA
-----------------------------------------------
Fix: https://gitee.com/src-openeuler/util-linux/issues/I28N07
Origin patch: https://patchwork.kernel.org/project/linux-scsi/patch/20190227041941. 1568-1-martin.petersen@oracle.com/
If partition table changed online, will record partition read-only flag.
Some devices come online in write protected state and switch to read-write once they are ready to process I/O requests. These devices broke with commit 20bd1d026aac ("scsi: sd: Keep disk read-only when re-reading partition") because we had no way to distinguish between a user decision to set a block_device read-only and the actual hardware device being write-protected.
Because partitions are dropped and recreated on revalidate we are unable to persist any user-provided policy in hd_struct. Introduce a bitmap in struct gendisk to track the user configuration. This bitmap is updated when BLKROSET is called on a given disk or partition.
A helper function, get_user_ro(), is provided to determine whether the ioctl has forced read-only state for a given block device. This helper is used by set_disk_ro() and add_partition() to ensure that both existing and newly created partitions will get the correct state.
- If BLKROSET sets a whole disk device read-only, all partitions will now end up in a read-only state.
- If BLKROSET sets a given partition read-only, that partition will remain read-only post revalidate.
- Otherwise both the whole disk device and any partitions will reflect the write protect state of the underlying device.
Since nobody knows what "policy" means, rename the field to "read_only" for clarity.
Cc: Jeremy Cline jeremy@jcline.org Cc: Oleksii Kurochko olkuroch@cisco.com Cc: stable@vger.kernel.org # v4.16+ Reported-by: Oleksii Kurochko olkuroch@cisco.com Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=201221 Fixes: 20bd1d026aac ("scsi: sd: Keep disk read-only when re-reading partition") Signed-off-by: Martin K. Petersen martin.petersen@oracle.com Signed-off-by: Ye Bin yebin10@huawei.com Reviewed-by: Jason Yan yanaijie@huawei.com Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- block/blk-core.c | 2 +- block/genhd.c | 34 ++++++++++++++++++++++++---------- block/ioctl.c | 4 ++++ block/partition-generic.c | 7 +++++-- include/linux/genhd.h | 11 +++++++---- 5 files changed, 41 insertions(+), 17 deletions(-)
diff --git a/block/blk-core.c b/block/blk-core.c index ffbe326c70b9..41c40a6acdca 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -2166,7 +2166,7 @@ static inline bool bio_check_ro(struct bio *bio, struct hd_struct *part) { const int op = bio_op(bio);
- if (part->policy && op_is_write(op)) { + if (part->read_only && op_is_write(op)) { char b[BDEVNAME_SIZE];
if (op_is_flush(bio->bi_opf) && !bio_sectors(bio)) diff --git a/block/genhd.c b/block/genhd.c index d8a9c901eaef..d9ec6bb4f880 100644 --- a/block/genhd.c +++ b/block/genhd.c @@ -1543,26 +1543,40 @@ static void set_disk_ro_uevent(struct gendisk *gd, int ro) kobject_uevent_env(&disk_to_dev(gd)->kobj, KOBJ_CHANGE, envp); }
-void set_device_ro(struct block_device *bdev, int flag) +void set_device_ro(struct block_device *bdev, bool state) { - bdev->bd_part->policy = flag; + bdev->bd_part->read_only = state; }
EXPORT_SYMBOL(set_device_ro);
-void set_disk_ro(struct gendisk *disk, int flag) +bool get_user_ro(struct gendisk *disk, unsigned int partno) +{ + /* Is the user read-only bit set for the whole disk device? */ + if (test_bit(0, disk->user_ro_bitmap)) + return true; + + /* Is the user read-only bit set for this particular partition? */ + if (test_bit(partno, disk->user_ro_bitmap)) + return true; + + return false; +} + +void set_disk_ro(struct gendisk *disk, bool state) { struct disk_part_iter piter; struct hd_struct *part;
- if (disk->part0.policy != flag) { - set_disk_ro_uevent(disk, flag); - disk->part0.policy = flag; - } + if (disk->part0.read_only != state) + set_disk_ro_uevent(disk, state);
- disk_part_iter_init(&piter, disk, DISK_PITER_INCL_EMPTY); + disk_part_iter_init(&piter, disk, DISK_PITER_INCL_EMPTY_PART0); while ((part = disk_part_iter_next(&piter))) - part->policy = flag; + if (get_user_ro(disk, part->partno)) + part->read_only = true; + else + part->read_only = state; disk_part_iter_exit(&piter); }
@@ -1572,7 +1586,7 @@ int bdev_read_only(struct block_device *bdev) { if (!bdev) return 0; - return bdev->bd_part->policy; + return bdev->bd_part->read_only; }
EXPORT_SYMBOL(bdev_read_only); diff --git a/block/ioctl.c b/block/ioctl.c index 5a6157b3735a..899ffd50a7c6 100644 --- a/block/ioctl.c +++ b/block/ioctl.c @@ -455,6 +455,10 @@ static int blkdev_roset(struct block_device *bdev, fmode_t mode, return ret; if (get_user(n, (int __user *)arg)) return -EFAULT; + if (n) + set_bit(bdev->bd_partno, bdev->bd_disk->user_ro_bitmap); + else + clear_bit(bdev->bd_partno, bdev->bd_disk->user_ro_bitmap); set_device_ro(bdev, n); return 0; } diff --git a/block/partition-generic.c b/block/partition-generic.c index d86d794d28e1..63b82df5bbb4 100644 --- a/block/partition-generic.c +++ b/block/partition-generic.c @@ -98,7 +98,7 @@ static ssize_t part_ro_show(struct device *dev, struct device_attribute *attr, char *buf) { struct hd_struct *p = dev_to_part(dev); - return sprintf(buf, "%d\n", p->policy ? 1 : 0); + return sprintf(buf, "%u\n", p->read_only ? 1 : 0); }
static ssize_t part_alignment_offset_show(struct device *dev, @@ -352,7 +352,10 @@ struct hd_struct *add_partition(struct gendisk *disk, int partno, queue_limit_discard_alignment(&disk->queue->limits, start); p->nr_sects = len; p->partno = partno; - p->policy = get_disk_ro(disk); + if (get_user_ro(disk, partno)) + p->read_only = true; + else + p->read_only = get_disk_ro(disk); p->disk = disk;
if (info) { diff --git a/include/linux/genhd.h b/include/linux/genhd.h index 666b23a88c6f..6a6da28ac0b9 100644 --- a/include/linux/genhd.h +++ b/include/linux/genhd.h @@ -116,7 +116,8 @@ struct hd_struct { unsigned int discard_alignment; struct device __dev; struct kobject *holder_dir; - int policy, partno; + bool read_only; + int partno; struct partition_meta_info *info; #ifdef CONFIG_FAIL_MAKE_REQUEST int make_it_fail; @@ -202,6 +203,7 @@ struct gendisk { */ struct disk_part_tbl __rcu *part_tbl; struct hd_struct part0; + DECLARE_BITMAP(user_ro_bitmap, DISK_MAX_PARTS);
const struct block_device_operations *fops; struct request_queue *queue; @@ -440,12 +442,13 @@ extern void del_gendisk(struct gendisk *gp); extern struct gendisk *get_gendisk(dev_t dev, int *partno); extern struct block_device *bdget_disk(struct gendisk *disk, int partno);
-extern void set_device_ro(struct block_device *bdev, int flag); -extern void set_disk_ro(struct gendisk *disk, int flag); +extern void set_device_ro(struct block_device *bdev, bool state); +extern void set_disk_ro(struct gendisk *disk, bool state); +extern bool get_user_ro(struct gendisk *disk, unsigned int partno);
static inline int get_disk_ro(struct gendisk *disk) { - return disk->part0.policy; + return disk->part0.read_only; }
extern void disk_block_events(struct gendisk *disk);
From: Ye Bin yebin10@huawei.com
hulk inclusion category: bugfix bugzilla: 46833 CVE: NA
-----------------------------------------------
Fixes: ("scsi: sd: block: Fix regressions in read-only block device handling") Signed-off-by: Ye Bin yebin10@huawei.com Reviewed-by: Jason Yan yanaijie@huawei.com Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- block/partition-generic.c | 1 + 1 file changed, 1 insertion(+)
diff --git a/block/partition-generic.c b/block/partition-generic.c index 63b82df5bbb4..b27ed20d3db4 100644 --- a/block/partition-generic.c +++ b/block/partition-generic.c @@ -286,6 +286,7 @@ void delete_partition(struct gendisk *disk, int partno) if (!part) return;
+ clear_bit(partno, disk->user_ro_bitmap); get_device(disk_to_dev(disk)); rcu_assign_pointer(ptbl->part[partno], NULL);
From: "Martin K. Petersen" martin.petersen@oracle.com
hulk inclusion category: bugfix bugzilla: 46833 CVE: NA
-----------------------------------------------
Fixes: ("scsi: sd: block: Fix regressions in read-only block device handling") Signed-off-by: Ye Bin yebin10@huawei.com Reviewed-by: Jason Yan yanaijie@huawei.com Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- block/blk-core.c | 2 +- block/genhd.c | 33 +++++++++++++++++++++------------ block/partition-generic.c | 6 +++--- include/linux/genhd.h | 16 +++++++++------- 4 files changed, 34 insertions(+), 23 deletions(-)
diff --git a/block/blk-core.c b/block/blk-core.c index 41c40a6acdca..ffbe326c70b9 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -2166,7 +2166,7 @@ static inline bool bio_check_ro(struct bio *bio, struct hd_struct *part) { const int op = bio_op(bio);
- if (part->read_only && op_is_write(op)) { + if (part->policy && op_is_write(op)) { char b[BDEVNAME_SIZE];
if (op_is_flush(bio->bi_opf) && !bio_sectors(bio)) diff --git a/block/genhd.c b/block/genhd.c index d9ec6bb4f880..e109a0702968 100644 --- a/block/genhd.c +++ b/block/genhd.c @@ -1307,6 +1307,7 @@ static void disk_release(struct device *dev) hd_free_part(&disk->part0); if (disk->queue) blk_put_queue(disk->queue); + kfree(disk->user_ro_bitmap); kfree(disk); } struct class block_class = { @@ -1481,6 +1482,14 @@ struct gendisk *__alloc_disk_node(int minors, int node_id) return NULL; }
+ disk->user_ro_bitmap = kzalloc_node( + BITS_TO_LONGS(DISK_MAX_PARTS) * sizeof(long), + GFP_KERNEL, node_id); + if (!disk->user_ro_bitmap) { + hd_free_part(&disk->part0); + kfree(disk); + return NULL; + } disk->minors = minors; rand_initialize_disk(disk); disk_to_dev(disk)->class = &block_class; @@ -1543,40 +1552,40 @@ static void set_disk_ro_uevent(struct gendisk *gd, int ro) kobject_uevent_env(&disk_to_dev(gd)->kobj, KOBJ_CHANGE, envp); }
-void set_device_ro(struct block_device *bdev, bool state) +void set_device_ro(struct block_device *bdev, int flag) { - bdev->bd_part->read_only = state; + bdev->bd_part->policy = flag; }
EXPORT_SYMBOL(set_device_ro);
-bool get_user_ro(struct gendisk *disk, unsigned int partno) +int get_user_ro(struct gendisk *disk, unsigned int partno) { /* Is the user read-only bit set for the whole disk device? */ if (test_bit(0, disk->user_ro_bitmap)) - return true; + return 1;
/* Is the user read-only bit set for this particular partition? */ if (test_bit(partno, disk->user_ro_bitmap)) - return true; + return 1;
- return false; + return 0; }
-void set_disk_ro(struct gendisk *disk, bool state) +void set_disk_ro(struct gendisk *disk, int flag) { struct disk_part_iter piter; struct hd_struct *part;
- if (disk->part0.read_only != state) - set_disk_ro_uevent(disk, state); + if (disk->part0.policy != flag) + set_disk_ro_uevent(disk, flag);
disk_part_iter_init(&piter, disk, DISK_PITER_INCL_EMPTY_PART0); while ((part = disk_part_iter_next(&piter))) if (get_user_ro(disk, part->partno)) - part->read_only = true; + part->policy = 1; else - part->read_only = state; + part->policy = flag; disk_part_iter_exit(&piter); }
@@ -1586,7 +1595,7 @@ int bdev_read_only(struct block_device *bdev) { if (!bdev) return 0; - return bdev->bd_part->read_only; + return bdev->bd_part->policy; }
EXPORT_SYMBOL(bdev_read_only); diff --git a/block/partition-generic.c b/block/partition-generic.c index b27ed20d3db4..a39c311aec38 100644 --- a/block/partition-generic.c +++ b/block/partition-generic.c @@ -98,7 +98,7 @@ static ssize_t part_ro_show(struct device *dev, struct device_attribute *attr, char *buf) { struct hd_struct *p = dev_to_part(dev); - return sprintf(buf, "%u\n", p->read_only ? 1 : 0); + return sprintf(buf, "%u\n", p->policy ? 1 : 0); }
static ssize_t part_alignment_offset_show(struct device *dev, @@ -354,9 +354,9 @@ struct hd_struct *add_partition(struct gendisk *disk, int partno, p->nr_sects = len; p->partno = partno; if (get_user_ro(disk, partno)) - p->read_only = true; + p->policy = 1; else - p->read_only = get_disk_ro(disk); + p->policy = get_disk_ro(disk); p->disk = disk;
if (info) { diff --git a/include/linux/genhd.h b/include/linux/genhd.h index 6a6da28ac0b9..404567f13cda 100644 --- a/include/linux/genhd.h +++ b/include/linux/genhd.h @@ -116,8 +116,7 @@ struct hd_struct { unsigned int discard_alignment; struct device __dev; struct kobject *holder_dir; - bool read_only; - int partno; + int policy, partno; struct partition_meta_info *info; #ifdef CONFIG_FAIL_MAKE_REQUEST int make_it_fail; @@ -203,7 +202,6 @@ struct gendisk { */ struct disk_part_tbl __rcu *part_tbl; struct hd_struct part0; - DECLARE_BITMAP(user_ro_bitmap, DISK_MAX_PARTS);
const struct block_device_operations *fops; struct request_queue *queue; @@ -223,7 +221,11 @@ struct gendisk { struct badblocks *bb; struct lockdep_map lockdep_map;
+#ifndef __GENKSYMS__ + unsigned long *user_ro_bitmap; +#else KABI_RESERVE(1) +#endif KABI_RESERVE(2) KABI_RESERVE(3) KABI_RESERVE(4) @@ -442,13 +444,13 @@ extern void del_gendisk(struct gendisk *gp); extern struct gendisk *get_gendisk(dev_t dev, int *partno); extern struct block_device *bdget_disk(struct gendisk *disk, int partno);
-extern void set_device_ro(struct block_device *bdev, bool state); -extern void set_disk_ro(struct gendisk *disk, bool state); -extern bool get_user_ro(struct gendisk *disk, unsigned int partno); +extern void set_device_ro(struct block_device *bdev, int flag); +extern void set_disk_ro(struct gendisk *disk, int flag); +extern int get_user_ro(struct gendisk *disk, unsigned int partno);
static inline int get_disk_ro(struct gendisk *disk) { - return disk->part0.read_only; + return disk->part0.policy; }
extern void disk_block_events(struct gendisk *disk);
From: Ye Bin yebin10@huawei.com
hulk inclusion category: bugfix bugzilla: 46833 CVE: NA
-----------------------------------------------
c4f20b042b70 patch may lead to clear read-only flag when re-read partition table.
This reverts commit c4f20b042b7060f505134dcb1fc283893ec41e2a
Fixes:c4f20b042b70("scsi: sd: block: Fix read-only flag residuals when partition table change") Signed-off-by: Ye Bin yebin10@huawei.com Reviewed-by: Jason Yan yanaijie@huawei.com Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- block/partition-generic.c | 1 - 1 file changed, 1 deletion(-)
diff --git a/block/partition-generic.c b/block/partition-generic.c index a39c311aec38..739c0cc5fd22 100644 --- a/block/partition-generic.c +++ b/block/partition-generic.c @@ -286,7 +286,6 @@ void delete_partition(struct gendisk *disk, int partno) if (!part) return;
- clear_bit(partno, disk->user_ro_bitmap); get_device(disk_to_dev(disk)); rcu_assign_pointer(ptbl->part[partno], NULL);
From: Ming Lei ming.lei@redhat.com
mainline inclusion from mainline-v5.8-rc7 commit 3f0dcfbcd2e162fc0a11c1f59b7acd42ee45f126 category: bugfix bugzilla: 47875 CVE: NA
-------------------------------------------------
I/O requests may be held in scheduler queue because of resource contention. The starvation scenario was handled properly in the regular completion path but we failed to account for it during I/O submission. This lead to the hang captured below. Make sure we run the queue when resource contention is encountered in the submission path.
[ 39.054963] scsi 13:0:0:0: rejecting I/O to dead device [ 39.058700] scsi 13:0:0:0: rejecting I/O to dead device [ 39.087855] sd 13:0:0:1: [sdd] Synchronizing SCSI cache [ 39.088909] scsi 13:0:0:1: rejecting I/O to dead device [ 39.095351] scsi 13:0:0:1: rejecting I/O to dead device [ 39.096962] scsi 13:0:0:1: rejecting I/O to dead device [ 247.021859] INFO: task scsi-stress-rem:813 blocked for more than 122 seconds. [ 247.023258] Not tainted 5.8.0-rc2 #8 [ 247.024069] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message. [ 247.025331] scsi-stress-rem D 0 813 802 0x00004000 [ 247.025334] Call Trace: [ 247.025354] __schedule+0x504/0x55f [ 247.027987] schedule+0x72/0xa8 [ 247.027991] blk_mq_freeze_queue_wait+0x63/0x8c [ 247.027994] ? do_wait_intr_irq+0x7a/0x7a [ 247.027996] blk_cleanup_queue+0x4b/0xc9 [ 247.028000] __scsi_remove_device+0xf6/0x14e [ 247.028002] scsi_remove_device+0x21/0x2b [ 247.029037] sdev_store_delete+0x58/0x7c [ 247.029041] kernfs_fop_write+0x10d/0x14f [ 247.031281] vfs_write+0xa2/0xdf [ 247.032670] ksys_write+0x6b/0xb3 [ 247.032673] do_syscall_64+0x56/0x82 [ 247.034053] entry_SYSCALL_64_after_hwframe+0x44/0xa9 [ 247.034059] RIP: 0033:0x7f69f39e9008 [ 247.036330] Code: Bad RIP value. [ 247.036331] RSP: 002b:00007ffdd8116498 EFLAGS: 00000246 ORIG_RAX: 0000000000000001 [ 247.037613] RAX: ffffffffffffffda RBX: 0000000000000002 RCX: 00007f69f39e9008 [ 247.039714] RDX: 0000000000000002 RSI: 000055cde92a0ab0 RDI: 0000000000000001 [ 247.039715] RBP: 000055cde92a0ab0 R08: 000000000000000a R09: 00007f69f3a79e80 [ 247.039716] R10: 000000000000000a R11: 0000000000000246 R12: 00007f69f3abb780 [ 247.039717] R13: 0000000000000002 R14: 00007f69f3ab6740 R15: 0000000000000002
Link: https://lore.kernel.org/r/20200720025435.812030-1-ming.lei@redhat.com Cc: linux-block@vger.kernel.org Cc: Christoph Hellwig hch@lst.de Reviewed-by: Bart Van Assche bvanassche@acm.org Reviewed-by: Christoph Hellwig hch@lst.de Signed-off-by: Ming Lei ming.lei@redhat.com Signed-off-by: Martin K. Petersen martin.petersen@oracle.com Conflict: drivers/scsi/scsi_lib.c [Yufen: compatible with commit 44ea147b2756 ("SCSI: fix queue cleanup race before scsi_requeue_run_queue is done")] Signed-off-by: Yufen Yu yuyufen@huawei.com Reviewed-by: Jason Yan yanaijie@huawei.com Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- drivers/scsi/scsi_lib.c | 27 +++++++++++++++++++-------- 1 file changed, 19 insertions(+), 8 deletions(-)
diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c index f5ee3b714b2a..0d5255482ca1 100644 --- a/drivers/scsi/scsi_lib.c +++ b/drivers/scsi/scsi_lib.c @@ -685,6 +685,21 @@ static void scsi_release_bidi_buffers(struct scsi_cmnd *cmd) cmd->request->next_rq->special = NULL; }
+static void scsi_run_queue_async(struct scsi_device *sdev) +{ + struct request_queue *q = sdev->request_queue; + + percpu_ref_get(&q->q_usage_counter); + if (scsi_target(sdev)->single_lun || + !list_empty(&sdev->host->starved_list)) { + if (!kblockd_schedule_work(&sdev->requeue_work)) + percpu_ref_put(&q->q_usage_counter); + } else { + blk_mq_run_hw_queues(q, true); + percpu_ref_put(&q->q_usage_counter); + } +} + /* Returns false when no more bytes to process, true if there are more */ static bool scsi_end_request(struct request *req, blk_status_t error, unsigned int bytes, unsigned int bidi_bytes) @@ -735,14 +750,9 @@ static bool scsi_end_request(struct request *req, blk_status_t error,
__blk_mq_end_request(req, error);
- if (scsi_target(sdev)->single_lun || - !list_empty(&sdev->host->starved_list)) { - if (!kblockd_schedule_work(&sdev->requeue_work)) - percpu_ref_put(&q->q_usage_counter); - } else { - blk_mq_run_hw_queues(q, true); - percpu_ref_put(&q->q_usage_counter); - } + scsi_run_queue_async(sdev); + + percpu_ref_put(&q->q_usage_counter); } else { unsigned long flags;
@@ -2206,6 +2216,7 @@ static blk_status_t scsi_queue_rq(struct blk_mq_hw_ctx *hctx, */ if (req->rq_flags & RQF_DONTPREP) scsi_mq_uninit_cmd(cmd); + scsi_run_queue_async(sdev); break; } return ret;
From: Ming Lei ming.lei@redhat.com
mainline inclusion from mainline-v5.10-rc1 commit ed5dd6a67d5eac5fb8873697b55dc1699752a9f3 category: bugfix bugzilla: 47875 CVE: NA
-------------------------------------------------
The request queue is currently run unconditionally in scsi_end_request() if both target queue and host queue are ready.
Recently Long Li reported that cost of a queue run can be very heavy in case of high queue depth. Improve this situation by only running the request queue when this LUN is busy.
Link: https://lore.kernel.org/r/20200910075056.36509-1-ming.lei@redhat.com Reported-by: Long Li longli@microsoft.com Tested-by: Long Li longli@microsoft.com Tested-by: Kashyap Desai kashyap.desai@broadcom.com Reviewed-by: Bart Van Assche bvanassche@acm.org Reviewed-by: Hannes Reinecke hare@suse.de Reviewed-by: Ewan D. Milne emilne@redhat.com Reviewed-by: John Garry john.garry@huawei.com Signed-off-by: Ming Lei ming.lei@redhat.com Signed-off-by: Martin K. Petersen martin.petersen@oracle.com Conflict: drivers/scsi/scsi_lib.c include/scsi/scsi_device.h Signed-off-by: Yufen Yu yuyufen@huawei.com Reviewed-by: Jason Yan yanaijie@huawei.com Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- drivers/scsi/scsi_lib.c | 34 +++++++++++++++++++++++++++++++++- include/scsi/scsi_device.h | 1 + 2 files changed, 34 insertions(+), 1 deletion(-)
diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c index 0d5255482ca1..1bca98745b00 100644 --- a/drivers/scsi/scsi_lib.c +++ b/drivers/scsi/scsi_lib.c @@ -695,7 +695,23 @@ static void scsi_run_queue_async(struct scsi_device *sdev) if (!kblockd_schedule_work(&sdev->requeue_work)) percpu_ref_put(&q->q_usage_counter); } else { - blk_mq_run_hw_queues(q, true); + /* + * smp_mb() present in sbitmap_queue_clear() or implied in + * .end_io is for ordering writing .device_busy in + * scsi_device_unbusy() and reading sdev->restarts. + */ + int old = atomic_read(&sdev->restarts); + + /* + * ->restarts has to be kept as non-zero if new budget + * contention occurs. + * + * No need to run queue when either another re-run + * queue wins in updating ->restarts or a new budget + * contention occurs. + */ + if (old && atomic_cmpxchg(&sdev->restarts, old, 0) == old) + blk_mq_run_hw_queues(sdev->request_queue, true); percpu_ref_put(&q->q_usage_counter); } } @@ -2136,7 +2152,23 @@ static bool scsi_mq_get_budget(struct blk_mq_hw_ctx *hctx)
out_put_device: put_device(&sdev->sdev_gendev); + atomic_inc(&sdev->restarts); + + /* + * Orders atomic_inc(&sdev->restarts) and atomic_read(&sdev->device_busy). + * .restarts must be incremented before .device_busy is read because the + * code in scsi_run_queue_async() depends on the order of these operations. + */ + smp_mb__after_atomic(); out: + /* + * If all in-flight requests originated from this LUN are completed + * before reading .device_busy, sdev->device_busy will be observed as + * zero, then blk_mq_delay_run_hw_queues() will dispatch this request + * soon. Otherwise, completion of one of these requests will observe + * the .restarts flag, and the request queue will be run for handling + * this request, see scsi_end_request(). + */ if (atomic_read(&sdev->device_busy) == 0 && !scsi_device_blocked(sdev)) blk_mq_delay_run_hw_queue(hctx, SCSI_QUEUE_DELAY); return false; diff --git a/include/scsi/scsi_device.h b/include/scsi/scsi_device.h index 550739a5ea96..6e2edd66c598 100644 --- a/include/scsi/scsi_device.h +++ b/include/scsi/scsi_device.h @@ -110,6 +110,7 @@ struct scsi_device { atomic_t device_busy; /* commands actually active on LLDD */ atomic_t device_blocked; /* Device returned QUEUE_FULL. */
+ atomic_t restarts; spinlock_t list_lock; struct list_head cmd_list; /* queue of in use SCSI Command structures */ struct list_head starved_entry;
From: Yufen Yu yuyufen@huawei.com
hulk inclusion category: bugfix bugzilla: 47875 CVE: NA
-------------------------------------------------
Use reserve room of struct scsi_device to store new added member.
Signed-off-by: Yufen Yu yuyufen@huawei.com Reviewed-by: Jason Yan yanaijie@huawei.com Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- include/scsi/scsi_device.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/include/scsi/scsi_device.h b/include/scsi/scsi_device.h index 6e2edd66c598..f0b011ca39e1 100644 --- a/include/scsi/scsi_device.h +++ b/include/scsi/scsi_device.h @@ -110,7 +110,6 @@ struct scsi_device { atomic_t device_busy; /* commands actually active on LLDD */ atomic_t device_blocked; /* Device returned QUEUE_FULL. */
- atomic_t restarts; spinlock_t list_lock; struct list_head cmd_list; /* queue of in use SCSI Command structures */ struct list_head starved_entry; @@ -231,10 +230,11 @@ struct scsi_device { struct task_struct *quiesced_by; #ifndef __GENKSYMS__ unsigned long offline_already; /* Device offline message logged */ + atomic_t restarts; #else KABI_RESERVE(1) -#endif KABI_RESERVE(2) +#endif KABI_RESERVE(3) KABI_RESERVE(4) KABI_RESERVE(5)
From: Ye Bin yebin10@huawei.com
hulk inclusion category: bugfix bugzilla: 49978 CVE: NA
-----------------------------------------------
This reverts commit 544058bd9aa143489cd480c5c076caf76c33b6c1.
We got follow error: 2021/02/26 10:15:49 parsed 1 programs 2021/02/26 10:15:49 executed programs: 0
Message from syslogd@localhost at Feb 26 10:15:52 ... kernel:[ 710.135641] page:ffff7e000309e600 count:-1 mapcount:0 mapping:0000000000000000 index:0x0
Message from syslogd@localhost at Feb 26 10:15:52 ... kernel:[ 710.136201] flags: 0xffffe0000000000()
As in sg_remove_scat will judge schp->k_use_sg then free pages. But in sg_build_indirect if (rem_sz > 0) we free pages without clean schp->k_use_sg or set schp->pages[i] with NULL. So it will lead to free in sg_remove_scat again.
Fixes: 544058bd9aa1("scsi: sg: fix memory leak in sg_build_indirect") Signed-off-by: Ye Bin yebin10@huawei.com Reviewed-by: Jason Yan yanaijie@huawei.com Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- drivers/scsi/sg.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-)
diff --git a/drivers/scsi/sg.c b/drivers/scsi/sg.c index 749faafbc977..10da329fa53f 100644 --- a/drivers/scsi/sg.c +++ b/drivers/scsi/sg.c @@ -1942,12 +1942,8 @@ sg_build_indirect(Sg_scatter_hold * schp, Sg_fd * sfp, int buff_size) k, rem_sz));
schp->bufflen = blk_size; - if (rem_sz > 0) { /* must have failed */ - for (i = 0; i < k; i++) - __free_pages(schp->pages[i], order); - + if (rem_sz > 0) /* must have failed */ return -ENOMEM; - } return 0; out: for (i = 0; i < k; i++)