From: Wang Yufen wangyufen@huawei.com
mainline inclusion from mainline-v6.0-rc1 commit d8616ee2affcff37c5d315310da557a694a3303d category: bugfix bugzilla: https://gitee.com/src-openeuler/kernel/issues/I6BRTY CVE: NA
Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?i...
--------------------------------
During TCP sockmap redirect pressure test, the following warning is triggered:
WARNING: CPU: 3 PID: 2145 at net/core/stream.c:205 sk_stream_kill_queues+0xbc/0xd0 CPU: 3 PID: 2145 Comm: iperf Kdump: loaded Tainted: G W 5.10.0+ #9 Call Trace: inet_csk_destroy_sock+0x55/0x110 inet_csk_listen_stop+0xbb/0x380 tcp_close+0x41b/0x480 inet_release+0x42/0x80 __sock_release+0x3d/0xa0 sock_close+0x11/0x20 __fput+0x9d/0x240 task_work_run+0x62/0x90 exit_to_user_mode_prepare+0x110/0x120 syscall_exit_to_user_mode+0x27/0x190 entry_SYSCALL_64_after_hwframe+0x44/0xa9
The reason we observed is that:
When the listener is closing, a connection may have completed the three-way handshake but not accepted, and the client has sent some packets. The child sks in accept queue release by inet_child_forget()->inet_csk_destroy_sock(), but psocks of child sks have not released.
To fix, add sock_map_destroy to release psocks.
Signed-off-by: Wang Yufen wangyufen@huawei.com Signed-off-by: Daniel Borkmann daniel@iogearbox.net Signed-off-by: Andrii Nakryiko andrii@kernel.org Acked-by: Jakub Sitnicki jakub@cloudflare.com Acked-by: John Fastabend john.fastabend@gmail.com Link: https://lore.kernel.org/bpf/20220524075311.649153-1-wangyufen@huawei.com Signed-off-by: Liu Jian liujian56@huawei.com
Conflicts: include/linux/bpf.h Reviewed-by: Wang Yufen wangyufen@huawei.com Reviewed-by: Yue Haibing yuehaibing@huawei.com Signed-off-by: Jialin Zhang zhangjialin11@huawei.com --- include/linux/bpf.h | 1 + include/linux/skmsg.h | 1 + net/core/skmsg.c | 1 + net/core/sock_map.c | 23 +++++++++++++++++++++++ net/ipv4/tcp_bpf.c | 1 + 5 files changed, 27 insertions(+)
diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 8d95f4c66275..79fb7c1be8fd 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -1845,6 +1845,7 @@ static inline bool bpf_map_is_dev_bound(struct bpf_map *map)
struct bpf_map *bpf_map_offload_map_alloc(union bpf_attr *attr); void bpf_map_offload_map_free(struct bpf_map *map); +void sock_map_destroy(struct sock *sk); #else static inline int bpf_prog_offload_init(struct bpf_prog *prog, union bpf_attr *attr) diff --git a/include/linux/skmsg.h b/include/linux/skmsg.h index 1899a1c8421c..a83885c5bb86 100644 --- a/include/linux/skmsg.h +++ b/include/linux/skmsg.h @@ -105,6 +105,7 @@ struct sk_psock { spinlock_t link_lock; refcount_t refcnt; void (*saved_unhash)(struct sock *sk); + void (*saved_destroy)(struct sock *sk); void (*saved_close)(struct sock *sk, long timeout); void (*saved_write_space)(struct sock *sk); struct proto *sk_proto; diff --git a/net/core/skmsg.c b/net/core/skmsg.c index 622d93d56953..925863fab5bd 100644 --- a/net/core/skmsg.c +++ b/net/core/skmsg.c @@ -616,6 +616,7 @@ struct sk_psock *sk_psock_init(struct sock *sk, int node) psock->eval = __SK_NONE; psock->sk_proto = prot; psock->saved_unhash = prot->unhash; + psock->saved_destroy = prot->destroy; psock->saved_close = prot->close; psock->saved_write_space = sk->sk_write_space;
diff --git a/net/core/sock_map.c b/net/core/sock_map.c index 28e518da4bcb..85df06298c98 100644 --- a/net/core/sock_map.c +++ b/net/core/sock_map.c @@ -1560,6 +1560,29 @@ void sock_map_unhash(struct sock *sk) saved_unhash(sk); }
+void sock_map_destroy(struct sock *sk) +{ + void (*saved_destroy)(struct sock *sk); + struct sk_psock *psock; + + rcu_read_lock(); + psock = sk_psock_get(sk); + if (unlikely(!psock)) { + rcu_read_unlock(); + if (sk->sk_prot->destroy) + sk->sk_prot->destroy(sk); + return; + } + + saved_destroy = psock->saved_destroy; + sock_map_remove_links(sk, psock); + rcu_read_unlock(); + sk_psock_stop(psock, true); + sk_psock_put(sk, psock); + saved_destroy(sk); +} +EXPORT_SYMBOL_GPL(sock_map_destroy); + void sock_map_close(struct sock *sk, long timeout) { void (*saved_close)(struct sock *sk, long timeout); diff --git a/net/ipv4/tcp_bpf.c b/net/ipv4/tcp_bpf.c index afeaf35194de..1a8f129fb309 100644 --- a/net/ipv4/tcp_bpf.c +++ b/net/ipv4/tcp_bpf.c @@ -577,6 +577,7 @@ static void tcp_bpf_rebuild_protos(struct proto prot[TCP_BPF_NUM_CFGS], struct proto *base) { prot[TCP_BPF_BASE] = *base; + prot[TCP_BPF_BASE].destroy = sock_map_destroy; prot[TCP_BPF_BASE].close = sock_map_close; prot[TCP_BPF_BASE].recvmsg = tcp_bpf_recvmsg; prot[TCP_BPF_BASE].stream_memory_read = tcp_bpf_stream_read;
From: John Fastabend john.fastabend@gmail.com
mainline inclusion from mainline-v6.0-rc1 commit 697fb80a53642be624f5121b6ca9d66769c180e0 category: bugfix bugzilla: https://gitee.com/src-openeuler/kernel/issues/I6BRTY CVE: NA
Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?i...
--------------------------------
syzbot reproduced the bug ...
BUG: sleeping function called from invalid context at kernel/workqueue.c:3010
... with the following stack trace fragment ...
start_flush_work kernel/workqueue.c:3010 [inline] __flush_work+0x109/0xb10 kernel/workqueue.c:3074 __cancel_work_timer+0x3f9/0x570 kernel/workqueue.c:3162 sk_psock_stop+0x4cb/0x630 net/core/skmsg.c:802 sock_map_destroy+0x333/0x760 net/core/sock_map.c:1581 inet_csk_destroy_sock+0x196/0x440 net/ipv4/inet_connection_sock.c:1130 __tcp_close+0xd5b/0x12b0 net/ipv4/tcp.c:2897 tcp_close+0x29/0xc0 net/ipv4/tcp.c:2909
... introduced by d8616ee2affc. Do a quick trace of the code path and the bug is obvious:
inet_csk_destroy_sock(sk) sk_prot->destroy(sk); <--- sock_map_destroy sk_psock_stop(, true); <--- true so cancel workqueue cancel_work_sync() <--- splat, because *_bh_disable()
We can not call cancel_work_sync() from inside destroy path. So mark the sk_psock_stop call to skip this cancel_work_sync(). This will avoid the BUG, but means we may run sk_psock_backlog after or during the destroy op. We zapped the ingress_skb queue in sk_psock_stop (safe to do with local_bh_disable) so its empty and the sk_psock_backlog work item will not find any pkts to process here. However, because we are not going to wait for it or clear its ->state its possible it kicks off or is already running. This should be 'safe' up until psock drops its refcnt to psock->sk. The sock_put() that drops this reference is only done at psock destroy time from sk_psock_destroy(). This is done through workqueue when sk_psock_drop() is called on psock refnt reaches 0. And importantly sk_psock_destroy() does a cancel_work_sync(). So trivial fix works.
I've had hit or miss luck reproducing this caught it once or twice with the provided reproducer when running with many runners. However, syzkaller is very good at reproducing so relying on syzkaller to verify fix.
Fixes: d8616ee2affc ("bpf, sockmap: Fix sk->sk_forward_alloc warn_on in sk_stream_kill_queues") Reported-by: syzbot+140186ceba0c496183bc@syzkaller.appspotmail.com Suggested-by: Hillf Danton hdanton@sina.com Signed-off-by: John Fastabend john.fastabend@gmail.com Signed-off-by: Daniel Borkmann daniel@iogearbox.net Cc: Wang Yufen wangyufen@huawei.com Link: https://lore.kernel.org/bpf/20220628035803.317876-1-john.fastabend@gmail.com Signed-off-by: Liu Jian liujian56@huawei.com Reviewed-by: Wang Yufen wangyufen@huawei.com Reviewed-by: Yue Haibing yuehaibing@huawei.com Signed-off-by: Jialin Zhang zhangjialin11@huawei.com --- net/core/sock_map.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/net/core/sock_map.c b/net/core/sock_map.c index 85df06298c98..50fb43905c87 100644 --- a/net/core/sock_map.c +++ b/net/core/sock_map.c @@ -1577,7 +1577,7 @@ void sock_map_destroy(struct sock *sk) saved_destroy = psock->saved_destroy; sock_map_remove_links(sk, psock); rcu_read_unlock(); - sk_psock_stop(psock, true); + sk_psock_stop(psock, false); sk_psock_put(sk, psock); saved_destroy(sk); }
From: Thiago Rafael Becker trbecker@gmail.com
mainline inclusion from mainline-v5.16-rc6 commit a31080899d5fdafcccf7f39dd214a814a2c82626 category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/I6D6RL CVE: NA
Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?i...
--------------------------------
mount.cifs can pass a device with multiple delimiters in it. This will cause rename(2) to fail with ENOENT.
V2: - Make sanitize_path more readable. - Fix multiple delimiters between UNC and prepath. - Avoid a memory leak if a bad user starts putting a lot of delimiters in the path on purpose.
BugLink: https://bugzilla.redhat.com/show_bug.cgi?id=2031200 Fixes: 24e0a1eff9e2 ("cifs: switch to new mount api") Cc: stable@vger.kernel.org # 5.11+ Acked-by: Ronnie Sahlberg lsahlber@redhat.com Signed-off-by: Thiago Rafael Becker trbecker@gmail.com Signed-off-by: Steve French stfrench@microsoft.com
conflicts: fs/cifs/fs_context.c
Signed-off-by: ZhaoLong Wang wangzhaolong1@huawei.com Reviewed-by: Zhang Xiaoxu zhangxiaoxu5@huawei.com Signed-off-by: Jialin Zhang zhangjialin11@huawei.com --- fs/cifs/connect.c | 38 +++++++++++++++++++++++++++++++++++++- 1 file changed, 37 insertions(+), 1 deletion(-)
diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index d1c3086d7ddd..b91a6e47f37d 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c @@ -1328,6 +1328,42 @@ static int get_option_gid(substring_t args[], kgid_t *result) return 0; }
+/* + * Remove duplicate path delimiters. Windows is supposed to do that + * but there are some bugs that prevent rename from working if there are + * multiple delimiters. + * + * Returns a sanitized duplicate of @path. The caller is responsible for + * cleaning up the original. + */ +#define IS_DELIM(c) ((c) == '/' || (c) == '\') +static char *sanitize_path(char *path) +{ + char *cursor1 = path, *cursor2 = path; + + /* skip all prepended delimiters */ + while (IS_DELIM(*cursor1)) + cursor1++; + + /* copy the first letter */ + *cursor2 = *cursor1; + + /* copy the remainder... */ + while (*(cursor1++)) { + /* ... skipping all duplicated delimiters */ + if (IS_DELIM(*cursor1) && IS_DELIM(*cursor2)) + continue; + *(++cursor2) = *cursor1; + } + + /* if the last character is a delimiter, skip it */ + if (IS_DELIM(*(cursor2 - 1))) + cursor2--; + + *(cursor2) = '\0'; + return kstrdup(path, GFP_KERNEL); +} + /* * Parse a devname into substrings and populate the vol->UNC and vol->prepath * fields with the result. Returns 0 on success and an error otherwise. @@ -1376,7 +1412,7 @@ cifs_parse_devname(const char *devname, struct smb_vol *vol) if (!*pos) return 0;
- vol->prepath = kstrdup(pos, GFP_KERNEL); + vol->prepath = sanitize_path(pos); if (!vol->prepath) return -ENOMEM;
From: Yu Kuai yukuai3@huawei.com
mainline inclusion from mainline-v6.2-rc5 commit 216f764716f34fe68cedc7296ae2043a7727e640 category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/I6DYRK CVE: NA
Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?i...
--------------------------------
The updating of 'bfqg->ref' should be protected by 'bfqd->lock', however, during code review, we found that bfq_pd_free() update 'bfqg->ref' without holding the lock, which is problematic:
1) bfq_pd_free() triggered by removing cgroup is called asynchronously; 2) bfqq will grab bfqg reference, and exit bfqq will drop the reference, which can concurrent with 1).
Unfortunately, 'bfqd->lock' can't be held here because 'bfqd' might already be freed in bfq_pd_free(). Fix the problem by using atomic refcount apis.
Signed-off-by: Yu Kuai yukuai3@huawei.com Reviewed-by: Jan Kara jack@suse.cz Link: https://lore.kernel.org/r/20230103084755.1256479-1-yukuai1@huaweicloud.com Signed-off-by: Jens Axboe axboe@kernel.dk Reviewed-by: Hou Tao houtao1@huawei.com Signed-off-by: Jialin Zhang zhangjialin11@huawei.com --- block/bfq-cgroup.c | 8 +++----- block/bfq-iosched.h | 2 +- 2 files changed, 4 insertions(+), 6 deletions(-)
diff --git a/block/bfq-cgroup.c b/block/bfq-cgroup.c index a285711f1b48..06c82d689ef0 100644 --- a/block/bfq-cgroup.c +++ b/block/bfq-cgroup.c @@ -321,14 +321,12 @@ struct bfq_group *bfqq_group(struct bfq_queue *bfqq)
static void bfqg_get(struct bfq_group *bfqg) { - bfqg->ref++; + refcount_inc(&bfqg->ref); }
static void bfqg_put(struct bfq_group *bfqg) { - bfqg->ref--; - - if (bfqg->ref == 0) + if (refcount_dec_and_test(&bfqg->ref)) kfree(bfqg); }
@@ -535,7 +533,7 @@ static struct blkg_policy_data *bfq_pd_alloc(gfp_t gfp, struct request_queue *q, }
/* see comments in bfq_bic_update_cgroup for why refcounting */ - bfqg_get(bfqg); + refcount_set(&bfqg->ref, 1); return &bfqg->pd; }
diff --git a/block/bfq-iosched.h b/block/bfq-iosched.h index 2a4a6f44efff..f6d9adfd1fc6 100644 --- a/block/bfq-iosched.h +++ b/block/bfq-iosched.h @@ -900,7 +900,7 @@ struct bfq_group { char blkg_path[128];
/* reference counter (see comments in bfq_bic_update_cgroup) */ - int ref; + refcount_t ref; /* Is bfq_group still online? */ bool online;
From: Zhihao Cheng chengzhihao1@huawei.com
hulk inclusion category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/I6D1E3 CVE: NA
--------------------------------
Following process will cause a memleak for copied up znode:
dirty_cow_znode zn = copy_znode(c, znode); err = insert_old_idx(c, zbr->lnum, zbr->offs); if (unlikely(err)) return ERR_PTR(err); // No one refers to zn.
Fetch a reproducer in [Link].
Link: https://bugzilla.kernel.org/show_bug.cgi?id=216705 Fixes: 1e51764a3c2a ("UBIFS: add new flash file system") Signed-off-by: Zhihao Cheng chengzhihao1@huawei.com Signed-off-by: Richard Weinberger richard@nod.at Reviewed-by: Zhang Yi yi.zhang@huawei.com Signed-off-by: Jialin Zhang zhangjialin11@huawei.com --- fs/ubifs/tnc.c | 137 +++++++++++++++++++++++++++++++------------------ 1 file changed, 87 insertions(+), 50 deletions(-)
diff --git a/fs/ubifs/tnc.c b/fs/ubifs/tnc.c index 414266d4d1ba..addee3fa0578 100644 --- a/fs/ubifs/tnc.c +++ b/fs/ubifs/tnc.c @@ -44,6 +44,33 @@ enum { NOT_ON_MEDIA = 3, };
+static void do_insert_old_idx(struct ubifs_info *c, + struct ubifs_old_idx *old_idx) +{ + struct ubifs_old_idx *o; + struct rb_node **p, *parent = NULL; + + p = &c->old_idx.rb_node; + while (*p) { + parent = *p; + o = rb_entry(parent, struct ubifs_old_idx, rb); + if (old_idx->lnum < o->lnum) + p = &(*p)->rb_left; + else if (old_idx->lnum > o->lnum) + p = &(*p)->rb_right; + else if (old_idx->offs < o->offs) + p = &(*p)->rb_left; + else if (old_idx->offs > o->offs) + p = &(*p)->rb_right; + else { + ubifs_err(c, "old idx added twice!"); + kfree(old_idx); + } + } + rb_link_node(&old_idx->rb, parent, p); + rb_insert_color(&old_idx->rb, &c->old_idx); +} + /** * insert_old_idx - record an index node obsoleted since the last commit start. * @c: UBIFS file-system description object @@ -69,35 +96,15 @@ enum { */ static int insert_old_idx(struct ubifs_info *c, int lnum, int offs) { - struct ubifs_old_idx *old_idx, *o; - struct rb_node **p, *parent = NULL; + struct ubifs_old_idx *old_idx;
old_idx = kmalloc(sizeof(struct ubifs_old_idx), GFP_NOFS); if (unlikely(!old_idx)) return -ENOMEM; old_idx->lnum = lnum; old_idx->offs = offs; + do_insert_old_idx(c, old_idx);
- p = &c->old_idx.rb_node; - while (*p) { - parent = *p; - o = rb_entry(parent, struct ubifs_old_idx, rb); - if (lnum < o->lnum) - p = &(*p)->rb_left; - else if (lnum > o->lnum) - p = &(*p)->rb_right; - else if (offs < o->offs) - p = &(*p)->rb_left; - else if (offs > o->offs) - p = &(*p)->rb_right; - else { - ubifs_err(c, "old idx added twice!"); - kfree(old_idx); - return 0; - } - } - rb_link_node(&old_idx->rb, parent, p); - rb_insert_color(&old_idx->rb, &c->old_idx); return 0; }
@@ -199,23 +206,6 @@ static struct ubifs_znode *copy_znode(struct ubifs_info *c, __set_bit(DIRTY_ZNODE, &zn->flags); __clear_bit(COW_ZNODE, &zn->flags);
- ubifs_assert(c, !ubifs_zn_obsolete(znode)); - __set_bit(OBSOLETE_ZNODE, &znode->flags); - - if (znode->level != 0) { - int i; - const int n = zn->child_cnt; - - /* The children now have new parent */ - for (i = 0; i < n; i++) { - struct ubifs_zbranch *zbr = &zn->zbranch[i]; - - if (zbr->znode) - zbr->znode->parent = zn; - } - } - - atomic_long_inc(&c->dirty_zn_cnt); return zn; }
@@ -233,6 +223,42 @@ static int add_idx_dirt(struct ubifs_info *c, int lnum, int dirt) return ubifs_add_dirt(c, lnum, dirt); }
+/** + * replace_znode - replace old znode with new znode. + * @c: UBIFS file-system description object + * @new_zn: new znode + * @old_zn: old znode + * @zbr: the branch of parent znode + * + * Replace old znode with new znode in TNC. + */ +static void replace_znode(struct ubifs_info *c, struct ubifs_znode *new_zn, + struct ubifs_znode *old_zn, struct ubifs_zbranch *zbr) +{ + ubifs_assert(c, !ubifs_zn_obsolete(old_zn)); + __set_bit(OBSOLETE_ZNODE, &old_zn->flags); + + if (old_zn->level != 0) { + int i; + const int n = new_zn->child_cnt; + + /* The children now have new parent */ + for (i = 0; i < n; i++) { + struct ubifs_zbranch *child = &new_zn->zbranch[i]; + + if (child->znode) + child->znode->parent = new_zn; + } + } + + zbr->znode = new_zn; + zbr->lnum = 0; + zbr->offs = 0; + zbr->len = 0; + + atomic_long_inc(&c->dirty_zn_cnt); +} + /** * dirty_cow_znode - ensure a znode is not being committed. * @c: UBIFS file-system description object @@ -265,21 +291,32 @@ static struct ubifs_znode *dirty_cow_znode(struct ubifs_info *c, return zn;
if (zbr->len) { - err = insert_old_idx(c, zbr->lnum, zbr->offs); - if (unlikely(err)) - return ERR_PTR(err); + struct ubifs_old_idx *old_idx; + + old_idx = kmalloc(sizeof(struct ubifs_old_idx), GFP_NOFS); + if (unlikely(!old_idx)) { + err = -ENOMEM; + goto out; + } + old_idx->lnum = zbr->lnum; + old_idx->offs = zbr->offs; + err = add_idx_dirt(c, zbr->lnum, zbr->len); - } else - err = 0; + if (err) { + kfree(old_idx); + goto out; + }
- zbr->znode = zn; - zbr->lnum = 0; - zbr->offs = 0; - zbr->len = 0; + do_insert_old_idx(c, old_idx); + } + + replace_znode(c, zn, znode, zbr);
- if (unlikely(err)) - return ERR_PTR(err); return zn; + +out: + kfree(zn); + return ERR_PTR(err); }
/**
From: Zhihao Cheng chengzhihao1@huawei.com
maillist inclusion category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/I6D1Y2 CVE: NA
Reference: https://git.kernel.org/pub/scm/linux/kernel/git/next/linux-next.git/commit/?...
--------------------------------
Dirty znodes will be written on flash in committing process with following states:
process A | znode state ------------------------------------------------------ do_commit | DIRTY_ZNODE ubifs_tnc_start_commit | DIRTY_ZNODE get_znodes_to_commit | DIRTY_ZNODE | COW_ZNODE layout_commit | DIRTY_ZNODE | COW_ZNODE fill_gap | 0 write master | 0 or OBSOLETE_ZNODE
process B | znode state ------------------------------------------------------ do_commit | DIRTY_ZNODE[1] ubifs_tnc_start_commit | DIRTY_ZNODE get_znodes_to_commit | DIRTY_ZNODE | COW_ZNODE ubifs_tnc_end_commit | DIRTY_ZNODE | COW_ZNODE write_index | 0 write master | 0 or OBSOLETE_ZNODE[2] or | DIRTY_ZNODE[3]
[1] znode is dirtied without concurrent committing process [2] znode is copied up (re-dirtied by other process) before cleaned up in committing process [3] znode is re-dirtied after cleaned up in committing process
Currently, the clean znode count is updated in free_obsolete_znodes(), which is called only in normal path. If do_commit failed, clean znode count won't be updated, which triggers a failure ubifs assertion[4] in ubifs_tnc_close(): ubifs_assert_failed [ubifs]: UBIFS assert failed: freed == n
[4] Commit 380347e9ca7682 ("UBIFS: Add an assertion for clean_zn_cnt").
Fix it by re-statisticing cleaned znode count in tnc_destroy_cnext().
Fetch a reproducer in [Link].
Link: https://bugzilla.kernel.org/show_bug.cgi?id=216704 Fixes: 1e51764a3c2a ("UBIFS: add new flash file system") Signed-off-by: Zhihao Cheng chengzhihao1@huawei.com Signed-off-by: Richard Weinberger richard@nod.at Reviewed-by: Zhang Yi yi.zhang@huawei.com Signed-off-by: Jialin Zhang zhangjialin11@huawei.com --- fs/ubifs/tnc.c | 15 +++++++++++++++ 1 file changed, 15 insertions(+)
diff --git a/fs/ubifs/tnc.c b/fs/ubifs/tnc.c index addee3fa0578..896f0ef1395a 100644 --- a/fs/ubifs/tnc.c +++ b/fs/ubifs/tnc.c @@ -3090,6 +3090,21 @@ static void tnc_destroy_cnext(struct ubifs_info *c) cnext = cnext->cnext; if (ubifs_zn_obsolete(znode)) kfree(znode); + else if (!ubifs_zn_cow(znode)) { + /* + * Don't forget to update clean znode count after + * committing failed, because ubifs will check this + * count while closing tnc. Non-obsolete znode could + * be re-dirtied during committing process, so dirty + * flag is untrustable. The flag 'COW_ZNODE' is set + * for each dirty znode before committing, and it is + * cleared as long as the znode become clean, so we + * can statistic clean znode count according to this + * flag. + */ + atomic_long_inc(&c->clean_zn_cnt); + atomic_long_inc(&ubifs_clean_zn_cnt); + } } while (cnext && cnext != c->cnext); }
From: Zhihao Cheng chengzhihao1@huawei.com
maillist inclusion category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/I6D1L0 CVE: NA
Reference: https://git.kernel.org/pub/scm/linux/kernel/git/next/linux-next.git/commit?i...
--------------------------------
After disabling fastmap(ubi->fm_disabled = 1), fastmap won't be updated, fm_anchor PEB is missed being scheduled for erasing. Besides, fm_anchor PEB may have smallest erase count, it doesn't participate wear-leveling. The difference of erase count between fm_anchor PEB and other PEBs will be larger and larger later on.
In which situation fastmap can be disabled? Initially, we have an UBI image with fastmap. Then the image will be atttached without module parameter 'fm_autoconvert', ubi turns to full scanning mode in one random attaching process(eg. bad fastmap caused by powercut), ubi fastmap is disabled since then.
Fix it by not getting fm_anchor if fastmap is disabled in ubi_refill_pools().
Fetch a reproducer in [Link].
Link: https://bugzilla.kernel.org/show_bug.cgi?id=216341 Fixes: 4b68bf9a69d22d ("ubi: Select fastmap anchor PEBs considering ...") Signed-off-by: Zhihao Cheng chengzhihao1@huawei.com Signed-off-by: Richard Weinberger richard@nod.at Reviewed-by: Zhang Yi yi.zhang@huawei.com Signed-off-by: Jialin Zhang zhangjialin11@huawei.com --- drivers/mtd/ubi/fastmap-wl.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-)
diff --git a/drivers/mtd/ubi/fastmap-wl.c b/drivers/mtd/ubi/fastmap-wl.c index 053ab52668e8..69592be33adf 100644 --- a/drivers/mtd/ubi/fastmap-wl.c +++ b/drivers/mtd/ubi/fastmap-wl.c @@ -146,13 +146,15 @@ void ubi_refill_pools(struct ubi_device *ubi) if (ubi->fm_anchor) { wl_tree_add(ubi->fm_anchor, &ubi->free); ubi->free_count++; + ubi->fm_anchor = NULL; }
- /* - * All available PEBs are in ubi->free, now is the time to get - * the best anchor PEBs. - */ - ubi->fm_anchor = ubi_wl_get_fm_peb(ubi, 1); + if (!ubi->fm_disabled) + /* + * All available PEBs are in ubi->free, now is the time to get + * the best anchor PEBs. + */ + ubi->fm_anchor = ubi_wl_get_fm_peb(ubi, 1);
for (;;) { enough = 0;
From: Zhihao Cheng chengzhihao1@huawei.com
maillist inclusion category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/I6D1S7 CVE: NA
Reference: https://git.kernel.org/pub/scm/linux/kernel/git/next/linux-next.git/commit?i...
--------------------------------
Wear-leveling entry could be freed in error path, which may be accessed again in eraseblk_count_seq_show(), for example:
__erase_worker eraseblk_count_seq_show wl = ubi->lookuptbl[*block_number] if (wl) wl_entry_destroy ubi->lookuptbl[e->pnum] = NULL kmem_cache_free(ubi_wl_entry_slab, e) erase_count = wl->ec // UAF!
Wear-leveling entry updating/accessing in ubi->lookuptbl should be protected by ubi->wl_lock, fix it by adding ubi->wl_lock to serialize wl entry accessing between wl_entry_destroy() and eraseblk_count_seq_show().
Fetch a reproducer in [Link].
Link: https://bugzilla.kernel.org/show_bug.cgi?id=216305 Fixes: 7bccd12d27b7e3 ("ubi: Add debugfs file for tracking PEB state") Fixes: 801c135ce73d5d ("UBI: Unsorted Block Images") Signed-off-by: Zhihao Cheng chengzhihao1@huawei.com Signed-off-by: Richard Weinberger richard@nod.at Reviewed-by: Zhang Yi yi.zhang@huawei.com Signed-off-by: Jialin Zhang zhangjialin11@huawei.com --- drivers/mtd/ubi/wl.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-)
diff --git a/drivers/mtd/ubi/wl.c b/drivers/mtd/ubi/wl.c index e2e70efc02fb..6da09263e0b9 100644 --- a/drivers/mtd/ubi/wl.c +++ b/drivers/mtd/ubi/wl.c @@ -885,8 +885,11 @@ static int wear_leveling_worker(struct ubi_device *ubi, struct ubi_work *wrk,
err = do_sync_erase(ubi, e1, vol_id, lnum, 0); if (err) { - if (e2) + if (e2) { + spin_lock(&ubi->wl_lock); wl_entry_destroy(ubi, e2); + spin_unlock(&ubi->wl_lock); + } goto out_ro; }
@@ -1121,14 +1124,18 @@ static int __erase_worker(struct ubi_device *ubi, struct ubi_work *wl_wrk) /* Re-schedule the LEB for erasure */ err1 = schedule_erase(ubi, e, vol_id, lnum, 0, false); if (err1) { + spin_lock(&ubi->wl_lock); wl_entry_destroy(ubi, e); + spin_unlock(&ubi->wl_lock); err = err1; goto out_ro; } return err; }
+ spin_lock(&ubi->wl_lock); wl_entry_destroy(ubi, e); + spin_unlock(&ubi->wl_lock); if (err != -EIO) /* * If this is not %-EIO, we have no idea what to do. Scheduling